Example #1
0
        def push_first_part_to_sql(streets, table_name, drop_prev):
            """

                SOME OF THE ADJUSTMENT MADE WERE IN REGARD TO:
                    --WEST 49 STREET
                    --avenue b vs. b avenue
                    --WEST 160 STREET
                    --BENNETT AVENUE
                    --WADSWORTH TERRACE
                    --75 PARK TERRACE EAST
                    --MARGINAL STREET
                    --AVENUE OF THE AMER
                    --8 LITTLE WEST 12 ST
                    --74 PIKE SLIP

                Do anything about?
                    CENTRAL PARK WEST -- Central Park W or Central Pk W
                    NORTH END AVE -- N End Ave. or North End Ave.


                NEED 'cust_snd' FOR THE FOLLOWING

                    PLCE --> PLACE		"WASHINGTON PLCE"
                    S --> SOUTH ST
                    W --> WEST ST
                    FREDERICK DOUGLASS B --> F.D. BLVD

            """

            if drop_prev:
                conn.set_isolation_level(0)
                cur.execute('drop table if exists %s;' % table_name)

            grps = streets.groupby('sc5')
            df_cols = ['primary_name', 'variation', 'full_variation']
            df = pd.DataFrame(columns=df_cols)
            for k, v in grps.groups.iteritems():
                t = grps.get_group(k)
                non_primary_idx = t[t.primary_flag != 'P'].index.tolist()
                primary_idx = t[t.index.isin(non_primary_idx) ==
                                False].index.tolist()
                tdf = pd.DataFrame()
                tdf['variation'] = t.ix[non_primary_idx, 'stname'].tolist()
                tdf['full_variation'] = t.ix[non_primary_idx,
                                             'full_stname'].tolist()
                tdf['primary_name'] = t.ix[primary_idx,
                                           'full_stname'].tolist()[0]
                tdf['sc5'] = t.ix[primary_idx, 'sc5'].tolist()[0]
                assert t.ix[v, 'sc5'].unique().tolist()
                df = df.append(tdf, ignore_index=True)

            df.to_sql(table_name, routing_eng, index=False)
            return True
Example #2
0
        def push_first_part_to_sql(streets,table_name,drop_prev):
            """

                SOME OF THE ADJUSTMENT MADE WERE IN REGARD TO:
                    --WEST 49 STREET
                    --avenue b vs. b avenue
                    --WEST 160 STREET
                    --BENNETT AVENUE
                    --WADSWORTH TERRACE
                    --75 PARK TERRACE EAST
                    --MARGINAL STREET
                    --AVENUE OF THE AMER
                    --8 LITTLE WEST 12 ST
                    --74 PIKE SLIP

                Do anything about?
                    CENTRAL PARK WEST -- Central Park W or Central Pk W
                    NORTH END AVE -- N End Ave. or North End Ave.


                NEED 'cust_snd' FOR THE FOLLOWING

                    PLCE --> PLACE		"WASHINGTON PLCE"
                    S --> SOUTH ST
                    W --> WEST ST
                    FREDERICK DOUGLASS B --> F.D. BLVD

            """


            if drop_prev:
                conn.set_isolation_level(   0)
                cur.execute(                'drop table if exists %s;' % table_name)

            grps = streets.groupby('sc5')
            df_cols = ['primary_name','variation','full_variation']
            df = pd.DataFrame(columns=df_cols)
            for k,v in grps.groups.iteritems():
                t = grps.get_group(k)
                non_primary_idx = t[t.primary_flag!='P'].index.tolist()
                primary_idx = t[t.index.isin(non_primary_idx)==False].index.tolist()
                tdf = pd.DataFrame()
                tdf['variation'] = t.ix[non_primary_idx,'stname'].tolist()
                tdf['full_variation'] = t.ix[non_primary_idx,'full_stname'].tolist()
                tdf['primary_name'] = t.ix[primary_idx,'full_stname'].tolist()[0]
                tdf['sc5'] = t.ix[primary_idx,'sc5'].tolist()[0]
                assert t.ix[v,'sc5'].unique().tolist()
                df = df.append(tdf,ignore_index=True)

            df.to_sql(table_name,routing_eng,index=False)
            return True
Example #3
0
    def load_parsed_snd_datafile_into_db(self,
                                         table_name='snd',
                                         drop_prev=True):
        py_path.append(os_path.join(os_environ['BD'], 'html'))
        from scrape_vendors import Scrape_Vendors, conn, cur, routing_eng
        SV = Scrape_Vendors()
        T = SV.T

        def push_first_part_to_sql(streets, table_name, drop_prev):
            """

                SOME OF THE ADJUSTMENT MADE WERE IN REGARD TO:
                    --WEST 49 STREET
                    --avenue b vs. b avenue
                    --WEST 160 STREET
                    --BENNETT AVENUE
                    --WADSWORTH TERRACE
                    --75 PARK TERRACE EAST
                    --MARGINAL STREET
                    --AVENUE OF THE AMER
                    --8 LITTLE WEST 12 ST
                    --74 PIKE SLIP

                Do anything about?
                    CENTRAL PARK WEST -- Central Park W or Central Pk W
                    NORTH END AVE -- N End Ave. or North End Ave.


                NEED 'cust_snd' FOR THE FOLLOWING

                    PLCE --> PLACE		"WASHINGTON PLCE"
                    S --> SOUTH ST
                    W --> WEST ST
                    FREDERICK DOUGLASS B --> F.D. BLVD

            """

            if drop_prev:
                conn.set_isolation_level(0)
                cur.execute('drop table if exists %s;' % table_name)

            grps = streets.groupby('sc5')
            df_cols = ['primary_name', 'variation', 'full_variation']
            df = pd.DataFrame(columns=df_cols)
            for k, v in grps.groups.iteritems():
                t = grps.get_group(k)
                non_primary_idx = t[t.primary_flag != 'P'].index.tolist()
                primary_idx = t[t.index.isin(non_primary_idx) ==
                                False].index.tolist()
                tdf = pd.DataFrame()
                tdf['variation'] = t.ix[non_primary_idx, 'stname'].tolist()
                tdf['full_variation'] = t.ix[non_primary_idx,
                                             'full_stname'].tolist()
                tdf['primary_name'] = t.ix[primary_idx,
                                           'full_stname'].tolist()[0]
                tdf['sc5'] = t.ix[primary_idx, 'sc5'].tolist()[0]
                assert t.ix[v, 'sc5'].unique().tolist()
                df = df.append(tdf, ignore_index=True)

            df.to_sql(table_name, routing_eng, index=False)
            return True

        # I.  NON S-TYPE RECORDS
        d = pd.read_csv(SND_NON_S_PATH, index_col=0)
        drop_idx = d[d.boro != 1].index.tolist()
        d = d.drop(drop_idx, axis=0)

        #   1. PROVE ONLY MN STREETS ARE CONSIDERED
        assert len(d.boro.unique().tolist()) == 1
        assert d.boro.unique().tolist()[0] == 1
        #   2. Remove non-essential Geographic Feature Types (GFT)
        remove_gft_features = ['B', 'C', 'J', 'O', 'R']
        rem_idx = d[d.GFT.isin(remove_gft_features)].index.tolist()
        d = d.drop(rem_idx, axis=0)
        assert len(d[d.GFT.isin(remove_gft_features)]) == 0
        #   3. PROVE ALL STREET NAMES ARE UPPER CASE
        d['stname'] = d['stname'].map(lambda s: s.upper())
        assert len(d[d.stname.str.match('[a-z]+')]) == 0
        #   4. Remove Roadbeds (Horizontal Typology Type Code (ht_name_type_code='R')
        rem_idx = d[d.ht_name_type_code == 'R'].index.tolist()
        d = d.drop(rem_idx, axis=0)
        assert len(d[d.ht_name_type_code == 'R']) == 0

        # II. S-TYPE RECORDS
        dd = pd.read_csv(SND_S_PATH, index_col=0)
        drop_idx = dd[dd.boro != 1].index.tolist()
        dd = dd.drop(drop_idx, axis=0)

        #   1. PROVE ONLY MN STREETS ARE CONSIDERED
        assert len(dd.boro.unique().tolist()) == 1
        assert dd.boro.unique().tolist()[0] == 1
        #   2. Remove non-essential Geographic Feature Types (GFT)
        remove_features = ['B', 'C', 'J', 'O', 'R']
        rem_idx = dd[dd.GFT.isin(['B', 'C', 'J', 'O', 'R'])].index.tolist()
        dd = dd.drop(rem_idx, axis=0)
        assert len(dd[dd.GFT.isin(['B', 'C', 'J', 'O', 'R'])]) == 0
        #   3. PROVE ALL STREET NAMES ARE UPPER CASE
        dd['stname'] = dd['stname'].map(lambda s: s.upper())
        assert len(dd[dd.stname.str.match('[a-z]+')]) == 0
        #   4. Remove non-essential Geographic Feature Types (GFT) from progenitors [progen_gft_1=='Z']
        remove_gft_features = ['Z']
        rem_idx = dd[dd.progen_gft_1 == 'Z'].index.tolist()
        dd = dd.drop(rem_idx, axis=0)
        assert len(dd[dd.progen_gft_1 == 'Z']) == 0

        ##
        # START STREET DATAFRAME
        ##

        # 1. Take First Part of Data from non-type-S records
        streets = d.copy()
        # PROVE ALL NAP'S WERE REMOVED
        rem_idx = streets[streets.GFT.isin(['N', 'X'])].index.tolist()
        streets = streets.drop(rem_idx, axis=0)
        assert len(streets[streets.GFT.isin(['N', 'X'])]) == 0

        # print len(dd),'initial rows from S-Type records'
        # 2. Supplement with Data from type-S records
        uniq_street_sc5 = streets.sc5.unique().tolist()
        nd = dd[(dd.sc5_1.isin(uniq_street_sc5)) |
                (dd.sc5_1.isin(uniq_street_sc5))].index.tolist()
        ndf = dd.ix[nd, :].copy()
        # print len(ndf),'remaining rows from S-Type records after taking only matching sc5'

        # -  Remove Blank Columns from Supplemental Data
        remove_cols = []
        # ---- PROVE THAT OK TO REMOVE 'progen_gft_1' b/c NO VALUES EXIST
        test_col = 'progen_gft_1'
        t = ndf[test_col].unique().tolist()
        assert True == (len(t) == 1) == (np.float(t[0]).is_integer() == False)
        remove_cols.append(test_col)
        # --
        ndf = ndf.drop(remove_cols, axis=1)

        # print len(ndf),'remaining rows before push'
        ##
        # PUSH TO SQL
        ##

        push_first_part_to_sql(streets, table_name, drop_prev)
        if drop_prev:
            conn.set_isolation_level(0)
            cur.execute('drop table if exists %(tmp_tbl)s;' %
                        {'tmp_tbl': table_name + '_tmp'})
        ndf.to_sql(table_name + '_tmp', routing_eng, index=False)

        return
        # PG SQL CMDS...
        cmd = """

                alter table snd
                    add column east boolean default false,
                    add column south boolean default false,
                    add column west boolean default false,
                    add column north boolean default false,
                    add column sc5_2 bigint,
                    add column stname_grp text[];

                -- 276 distinct sc5_1 in _tmp
                -- 221 rows for below (276 without regex exclusions)

                update snd _orig set stname_grp = name_grp
                from

                (select array_agg(t.stname) name_grp,f2.s_sc5 s_sc5
                    from
                        (select array_agg(distinct a.variation) orig_variations from snd a) as f3,
                        snd_tmp t,
                        (select distinct s.sc5_1 s_sc5 from snd_tmp s) as f2
                    where t.sc5_1 = f2.s_sc5
                    and not (orig_variations && array[t.stname] )
                    and not (t.stname ilike '%roadbed%' or t.stname ilike '%EXTENSION%'
                        or t.stname ilike '%PEDESTRIAN%' or t.stname ilike '%SIDE HW%' )

                    group by f2.s_sc5) as f1
                where s_sc5 = _orig.sc5::bigint;

                -- 454 rows in snd with non-null stname_grp

                insert into snd (variation,primary_name,sc5)
                select variation,primary_name,sc5
                from
                    (select distinct unnest(n.stname_grp) variation,
                        n.primary_name primary_name,
                        n.sc5 sc5 from snd n) as f1,

                    (select array_agg(full_variation) all_full_varies,
                        array_agg(variation) all_varies,
                        array_agg(primary_name) all_primaries from snd t) as f2
                where not (all_full_varies && array[variation] OR all_varies && array[variation] OR all_primaries && array[variation]);

                -- ASSERT -- res==True
                --select all_vars=uniq_vars res
                --from
                --    (select count(n1.variation) all_vars from snd n1 where n1.variation is not null or n1.variation !='') as f1,
                --    (select count(distinct n2.variation) uniq_vars from snd n2 where n2.variation is not null or n2.variation !='') as f2;

                update snd n set east = true
                from
                    (select t.progen_word_1 t_progen_word_1,
                        t.progen_word_2 t_progen_word_2,
                        t.sc5_1 t_sc5_1,
                        t.sc5_2 t_sc5_2
                    from snd_tmp t) as f1
                where ( (n.sc5 = t_sc5_1 or n.sc5 = t_sc5_2) OR  (n.sc5_2 = t_sc5_1 or n.sc5_2 = t_sc5_2) )
                and (t_progen_word_1 = 'E' or t_progen_word_2 = 'E');


                update snd n set west = true
                from
                    (select t.progen_word_1 t_progen_word_1,
                        t.progen_word_2 t_progen_word_2,
                        t.sc5_1 t_sc5_1,
                        t.sc5_2 t_sc5_2
                    from snd_tmp t) as f1
                where ( (n.sc5 = t_sc5_1 or n.sc5 = t_sc5_2) OR  (n.sc5_2 = t_sc5_1 or n.sc5_2 = t_sc5_2) )
                and (t_progen_word_1 = 'W' or t_progen_word_2 = 'W');

                -- ASSERT -- len({below}) == 0
                --select progen_word_1 from snd_tmp where progen_word_1 ilike 'w';

                -- ASSERT -- len({below}) == 0
                --select progen_word_2 from snd_tmp where progen_word_2 ilike 'e';


                update snd set primary_name = regexp_replace(primary_name,
                    '^(EAST|WEST)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set full_variation = regexp_replace(full_variation,
                    '^(EAST|WEST)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set variation = regexp_replace(variation,
                    '^(EAST|WEST)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set full_variation = regexp_replace(full_variation,
                    '^(TRANSVRS|CPE|CPW|DOUGLASS|RIIS|RISS|NY|PATH|VLADECK|NEW)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set variation = regexp_replace(variation,
                    '^(TRANSVRS|CPE|CPW|DOUGLASS|RIIS|RISS|NY|PATH|VLADECK|NEW)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');

                update snd set primary_name = 'FDR DRIVE' where primary_name = 'F D R DRIVE';

                -- ASSERT -- len({below}) == 0
                --select count(*)=0 res from snd where variation!=full_variation;
                alter table snd drop column if exists full_variation;

                --PROVE ALL STREETNAMES AND STREET IDS ARE FOUND IN 'snd'
                -- ASSERT -- res==True
                --select count(*)=0 res from
                --    snd_tmp t,
                --    (select array_agg(n.sc5) all_sc5 from snd n) as f1,
                --    (select array_agg(n2.sc5_2) all_sc5_2 from snd n2) as f2,
                --    (select array_agg(n3.variation) all_names from snd n3) as f3
                --where NOT ( t.sc5_1::bigint = ANY (all_sc5) OR t.sc5_1::bigint = ANY (all_sc5_2) )
                --and NOT ( t.sc5_2::bigint = ANY (all_sc5) OR t.sc5_2::bigint = ANY (all_sc5_2) )
                --and NOT ( t.stname = ANY (all_names) );
                drop table if exists snd_tmp;

                -- ASSERT
                --select count(distinct sc5_2)=0 res from snd
                alter table snd drop column sc5_2;


                update snd set last_updated = 'now'::timestamp with time zone;

                update snd s set primary_name = regexp_replace(s.primary_name,
                    '([a-zA-Z0-9])[\s\s]+([a-zA-Z0-9]*)','\\1 \\2','g')

                drop table if exists tmp_snd;

                insert into snd (variation,sc5,west,east)
                select distinct on (s1.primary_name) s1.primary_name variation,s1.sc5 sc5,s1.west west,s1.east east
                from
                    snd s1,
                    (select array_agg(s2.variation) all_variations from snd s2) as f1
                where not s1.primary_name = ANY (all_variations);

                --PROVE THAT ALL stname_grp NAMES ARE IN VARIATION COLUMN
                --select count(*)=0 res from
                --    (select unnest(s.stname_grp) all_grp_names from snd s where s.stname_grp is not null) as f1,
                --    (select array_agg(n3.variation) all_names from snd n3) as f3
                --where NOT ( all_grp_names = ANY (all_names) );
                alter table snd drop column stname_grp;

                alter table snd add column tmp bigint;
                update snd set tmp = sc5::bigint;
                alter table snd drop column sc5;
                alter table snd rename column tmp to sc5;


                update snd s set
                    primary_name = variation,
                    last_updated = 'now'::timestamp with time zone
                where last_updated is null;

                """
        conn.set_isolation_level(0)
        cur.execute(cmd)

        d = pd.read_sql("select * from snd", routing_eng)
        d = d.sort('primary_name').reset_index(drop=True)
        l_funct = lambda s: 0 if len(str(s).strip()) == 0 else int(s)
        d['sc5'] = d.sc5.map(l_funct)

        cols = ['pattern', 'repl', '_flags']
        ndf = pd.DataFrame(columns=cols)
        grp = d.groupby('primary_name')
        for k, v in grp.groups.iteritems():
            patt = '(' + ' | '.join(d.ix[v, 'variation'].tolist()) + ')'
            ndf = ndf.append(dict(zip(cols, [patt, k, 'g'])),
                             ignore_index=True)

        ndf['repl'] = ndf.repl.str.replace(
            r'([a-zA-Z0-9]*)([\s\s]+)([a-zA-Z0-9]*)', r'\g<1> \g<3>')
        ndf.to_sql('tmp_snd', routing_eng, index=False)

        a = """

            select s.variation,s.primary_name
            from snd s,(select array_agg(t.address) all_addr from pluto p where p.geom is null) as f1
            where s.variation = ANY (all_addr);

            select sl_addr
            from
            (select array_agg(s.variation) all_variations from snd s) as f1,
            (select sl.address sl_addr from seamless sl where geom is null) as f2
            where sl_addr = ANY (all_variations);

        """

        # LOTS OF COMMANDS SHOULD BE HERE (FROM TAIL PART OF LONGSTRING ABOVE)
        # PROVE THAT TABLE IS IN ORIGINAL CONDITION
        saved_col_type_d_snd = {
            u'east': u'boolean',
            u'last_updated': u'timestamp with time zone',
            u'primary_name': u'text',
            u'sc5': u'bigint',
            u'uid': u'integer',
            u'variation': u'text',
            u'west': u'boolean'
        }
        x = pd.read_sql(
            """   select column_name, data_type
                            from INFORMATION_SCHEMA.COLUMNS
                            where table_name = 'snd'""", routing_eng)
        col_type_d = dict(zip(x.column_name.tolist(), x.data_type.tolist()))
        assert col_type_d == saved_col_type_d_snd

        return
Example #4
0
    def load_parsed_snd_datafile_into_db(self,table_name='snd',drop_prev=True):
        py_path.append(os_path.join(os_environ['BD'],'html'))
        from scrape_vendors             import Scrape_Vendors,conn,cur,routing_eng
        SV                              =   Scrape_Vendors()
        T                               =   SV.T

        def push_first_part_to_sql(streets,table_name,drop_prev):
            """

                SOME OF THE ADJUSTMENT MADE WERE IN REGARD TO:
                    --WEST 49 STREET
                    --avenue b vs. b avenue
                    --WEST 160 STREET
                    --BENNETT AVENUE
                    --WADSWORTH TERRACE
                    --75 PARK TERRACE EAST
                    --MARGINAL STREET
                    --AVENUE OF THE AMER
                    --8 LITTLE WEST 12 ST
                    --74 PIKE SLIP

                Do anything about?
                    CENTRAL PARK WEST -- Central Park W or Central Pk W
                    NORTH END AVE -- N End Ave. or North End Ave.


                NEED 'cust_snd' FOR THE FOLLOWING

                    PLCE --> PLACE		"WASHINGTON PLCE"
                    S --> SOUTH ST
                    W --> WEST ST
                    FREDERICK DOUGLASS B --> F.D. BLVD

            """


            if drop_prev:
                conn.set_isolation_level(   0)
                cur.execute(                'drop table if exists %s;' % table_name)

            grps = streets.groupby('sc5')
            df_cols = ['primary_name','variation','full_variation']
            df = pd.DataFrame(columns=df_cols)
            for k,v in grps.groups.iteritems():
                t = grps.get_group(k)
                non_primary_idx = t[t.primary_flag!='P'].index.tolist()
                primary_idx = t[t.index.isin(non_primary_idx)==False].index.tolist()
                tdf = pd.DataFrame()
                tdf['variation'] = t.ix[non_primary_idx,'stname'].tolist()
                tdf['full_variation'] = t.ix[non_primary_idx,'full_stname'].tolist()
                tdf['primary_name'] = t.ix[primary_idx,'full_stname'].tolist()[0]
                tdf['sc5'] = t.ix[primary_idx,'sc5'].tolist()[0]
                assert t.ix[v,'sc5'].unique().tolist()
                df = df.append(tdf,ignore_index=True)

            df.to_sql(table_name,routing_eng,index=False)
            return True


        # I.  NON S-TYPE RECORDS
        d = pd.read_csv(SND_NON_S_PATH,index_col=0)
        drop_idx = d[d.boro!=1].index.tolist()
        d = d.drop(drop_idx,axis=0)

        #   1. PROVE ONLY MN STREETS ARE CONSIDERED
        assert len(d.boro.unique().tolist())==1
        assert d.boro.unique().tolist()[0]==1
        #   2. Remove non-essential Geographic Feature Types (GFT)
        remove_gft_features = ['B','C','J','O','R']
        rem_idx = d[d.GFT.isin(remove_gft_features)].index.tolist()
        d = d.drop(rem_idx,axis=0)
        assert len(d[d.GFT.isin(remove_gft_features)])==0
        #   3. PROVE ALL STREET NAMES ARE UPPER CASE
        d['stname'] = d['stname'].map(lambda s: s.upper())
        assert len(d[d.stname.str.match('[a-z]+')])==0
        #   4. Remove Roadbeds (Horizontal Typology Type Code (ht_name_type_code='R')
        rem_idx = d[d.ht_name_type_code=='R'].index.tolist()
        d = d.drop(rem_idx,axis=0)
        assert len(d[d.ht_name_type_code=='R'])==0



        # II. S-TYPE RECORDS
        dd = pd.read_csv(SND_S_PATH,index_col=0)
        drop_idx = dd[dd.boro!=1].index.tolist()
        dd = dd.drop(drop_idx,axis=0)


        #   1. PROVE ONLY MN STREETS ARE CONSIDERED
        assert len(dd.boro.unique().tolist())==1
        assert dd.boro.unique().tolist()[0]==1
        #   2. Remove non-essential Geographic Feature Types (GFT)
        remove_features = ['B','C','J','O','R']
        rem_idx = dd[dd.GFT.isin(['B','C','J','O','R'])].index.tolist()
        dd = dd.drop(rem_idx,axis=0)
        assert len(dd[dd.GFT.isin(['B','C','J','O','R'])])==0
        #   3. PROVE ALL STREET NAMES ARE UPPER CASE
        dd['stname'] = dd['stname'].map(lambda s: s.upper())
        assert len(dd[dd.stname.str.match('[a-z]+')])==0
        #   4. Remove non-essential Geographic Feature Types (GFT) from progenitors [progen_gft_1=='Z']
        remove_gft_features = ['Z']
        rem_idx = dd[dd.progen_gft_1=='Z'].index.tolist()
        dd = dd.drop(rem_idx,axis=0)
        assert len(dd[dd.progen_gft_1=='Z'])==0

        ##
        # START STREET DATAFRAME
        ##

        # 1. Take First Part of Data from non-type-S records
        streets = d.copy()
        # PROVE ALL NAP'S WERE REMOVED
        rem_idx = streets[streets.GFT.isin(['N','X'])].index.tolist()
        streets = streets.drop(rem_idx,axis=0)
        assert len(streets[streets.GFT.isin(['N','X'])])==0

        # print len(dd),'initial rows from S-Type records'
        # 2. Supplement with Data from type-S records
        uniq_street_sc5 = streets.sc5.unique().tolist()
        nd = dd[(dd.sc5_1.isin(uniq_street_sc5))|(dd.sc5_1.isin(uniq_street_sc5))].index.tolist()
        ndf = dd.ix[nd,:].copy()
        # print len(ndf),'remaining rows from S-Type records after taking only matching sc5'


        # -  Remove Blank Columns from Supplemental Data
        remove_cols = []
        # ---- PROVE THAT OK TO REMOVE 'progen_gft_1' b/c NO VALUES EXIST
        test_col='progen_gft_1'
        t=ndf[test_col].unique().tolist()
        assert True == (len(t)==1) == (np.float(t[0]).is_integer()==False)
        remove_cols.append(test_col)
        # --
        ndf = ndf.drop(remove_cols,axis=1)

        # print len(ndf),'remaining rows before push'
        ##
        # PUSH TO SQL
        ##

        push_first_part_to_sql(streets,table_name,drop_prev)
        if drop_prev:
            conn.set_isolation_level(       0)
            cur.execute(                    'drop table if exists %(tmp_tbl)s;' % {'tmp_tbl' :   table_name+'_tmp'})
        ndf.to_sql(                     table_name+'_tmp',routing_eng,index=False)

        return
        # PG SQL CMDS...
        cmd =   """

                alter table snd
                    add column east boolean default false,
                    add column south boolean default false,
                    add column west boolean default false,
                    add column north boolean default false,
                    add column sc5_2 bigint,
                    add column stname_grp text[];

                -- 276 distinct sc5_1 in _tmp
                -- 221 rows for below (276 without regex exclusions)

                update snd _orig set stname_grp = name_grp
                from

                (select array_agg(t.stname) name_grp,f2.s_sc5 s_sc5
                    from
                        (select array_agg(distinct a.variation) orig_variations from snd a) as f3,
                        snd_tmp t,
                        (select distinct s.sc5_1 s_sc5 from snd_tmp s) as f2
                    where t.sc5_1 = f2.s_sc5
                    and not (orig_variations && array[t.stname] )
                    and not (t.stname ilike '%roadbed%' or t.stname ilike '%EXTENSION%'
                        or t.stname ilike '%PEDESTRIAN%' or t.stname ilike '%SIDE HW%' )

                    group by f2.s_sc5) as f1
                where s_sc5 = _orig.sc5::bigint;

                -- 454 rows in snd with non-null stname_grp

                insert into snd (variation,primary_name,sc5)
                select variation,primary_name,sc5
                from
                    (select distinct unnest(n.stname_grp) variation,
                        n.primary_name primary_name,
                        n.sc5 sc5 from snd n) as f1,

                    (select array_agg(full_variation) all_full_varies,
                        array_agg(variation) all_varies,
                        array_agg(primary_name) all_primaries from snd t) as f2
                where not (all_full_varies && array[variation] OR all_varies && array[variation] OR all_primaries && array[variation]);

                -- ASSERT -- res==True
                --select all_vars=uniq_vars res
                --from
                --    (select count(n1.variation) all_vars from snd n1 where n1.variation is not null or n1.variation !='') as f1,
                --    (select count(distinct n2.variation) uniq_vars from snd n2 where n2.variation is not null or n2.variation !='') as f2;

                update snd n set east = true
                from
                    (select t.progen_word_1 t_progen_word_1,
                        t.progen_word_2 t_progen_word_2,
                        t.sc5_1 t_sc5_1,
                        t.sc5_2 t_sc5_2
                    from snd_tmp t) as f1
                where ( (n.sc5 = t_sc5_1 or n.sc5 = t_sc5_2) OR  (n.sc5_2 = t_sc5_1 or n.sc5_2 = t_sc5_2) )
                and (t_progen_word_1 = 'E' or t_progen_word_2 = 'E');


                update snd n set west = true
                from
                    (select t.progen_word_1 t_progen_word_1,
                        t.progen_word_2 t_progen_word_2,
                        t.sc5_1 t_sc5_1,
                        t.sc5_2 t_sc5_2
                    from snd_tmp t) as f1
                where ( (n.sc5 = t_sc5_1 or n.sc5 = t_sc5_2) OR  (n.sc5_2 = t_sc5_1 or n.sc5_2 = t_sc5_2) )
                and (t_progen_word_1 = 'W' or t_progen_word_2 = 'W');

                -- ASSERT -- len({below}) == 0
                --select progen_word_1 from snd_tmp where progen_word_1 ilike 'w';

                -- ASSERT -- len({below}) == 0
                --select progen_word_2 from snd_tmp where progen_word_2 ilike 'e';


                update snd set primary_name = regexp_replace(primary_name,
                    '^(EAST|WEST)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set full_variation = regexp_replace(full_variation,
                    '^(EAST|WEST)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set variation = regexp_replace(variation,
                    '^(EAST|WEST)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set full_variation = regexp_replace(full_variation,
                    '^(TRANSVRS|CPE|CPW|DOUGLASS|RIIS|RISS|NY|PATH|VLADECK|NEW)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');
                update snd set variation = regexp_replace(variation,
                    '^(TRANSVRS|CPE|CPW|DOUGLASS|RIIS|RISS|NY|PATH|VLADECK|NEW)([\s]+)([0-9]+)\s(.*)$','\\1 \\3 \\4','g');

                update snd set primary_name = 'FDR DRIVE' where primary_name = 'F D R DRIVE';

                -- ASSERT -- len({below}) == 0
                --select count(*)=0 res from snd where variation!=full_variation;
                alter table snd drop column if exists full_variation;

                --PROVE ALL STREETNAMES AND STREET IDS ARE FOUND IN 'snd'
                -- ASSERT -- res==True
                --select count(*)=0 res from
                --    snd_tmp t,
                --    (select array_agg(n.sc5) all_sc5 from snd n) as f1,
                --    (select array_agg(n2.sc5_2) all_sc5_2 from snd n2) as f2,
                --    (select array_agg(n3.variation) all_names from snd n3) as f3
                --where NOT ( t.sc5_1::bigint = ANY (all_sc5) OR t.sc5_1::bigint = ANY (all_sc5_2) )
                --and NOT ( t.sc5_2::bigint = ANY (all_sc5) OR t.sc5_2::bigint = ANY (all_sc5_2) )
                --and NOT ( t.stname = ANY (all_names) );
                drop table if exists snd_tmp;

                -- ASSERT
                --select count(distinct sc5_2)=0 res from snd
                alter table snd drop column sc5_2;


                update snd set last_updated = 'now'::timestamp with time zone;

                update snd s set primary_name = regexp_replace(s.primary_name,
                    '([a-zA-Z0-9])[\s\s]+([a-zA-Z0-9]*)','\\1 \\2','g')

                drop table if exists tmp_snd;

                insert into snd (variation,sc5,west,east)
                select distinct on (s1.primary_name) s1.primary_name variation,s1.sc5 sc5,s1.west west,s1.east east
                from
                    snd s1,
                    (select array_agg(s2.variation) all_variations from snd s2) as f1
                where not s1.primary_name = ANY (all_variations);

                --PROVE THAT ALL stname_grp NAMES ARE IN VARIATION COLUMN
                --select count(*)=0 res from
                --    (select unnest(s.stname_grp) all_grp_names from snd s where s.stname_grp is not null) as f1,
                --    (select array_agg(n3.variation) all_names from snd n3) as f3
                --where NOT ( all_grp_names = ANY (all_names) );
                alter table snd drop column stname_grp;

                alter table snd add column tmp bigint;
                update snd set tmp = sc5::bigint;
                alter table snd drop column sc5;
                alter table snd rename column tmp to sc5;


                update snd s set
                    primary_name = variation,
                    last_updated = 'now'::timestamp with time zone
                where last_updated is null;

                """
        conn.set_isolation_level(           0)
        cur.execute(                        cmd)

        d = pd.read_sql("select * from snd",routing_eng)
        d = d.sort('primary_name').reset_index(drop=True)
        l_funct = lambda s: 0 if len(str(s).strip())==0 else int(s)
        d['sc5']=d.sc5.map(l_funct)


        cols = ['pattern','repl','_flags']
        ndf = pd.DataFrame(columns=cols)
        grp = d.groupby('primary_name')
        for k,v in grp.groups.iteritems():
            patt = '('+' | '.join(d.ix[v,'variation'].tolist()) + ')'
            ndf = ndf.append(dict(zip(cols,[patt,k,'g'])),ignore_index=True)

        ndf['repl'] = ndf.repl.str.replace(r'([a-zA-Z0-9]*)([\s\s]+)([a-zA-Z0-9]*)',r'\g<1> \g<3>')
        ndf.to_sql('tmp_snd',routing_eng,index=False)

        a="""

            select s.variation,s.primary_name
            from snd s,(select array_agg(t.address) all_addr from pluto p where p.geom is null) as f1
            where s.variation = ANY (all_addr);

            select sl_addr
            from
            (select array_agg(s.variation) all_variations from snd s) as f1,
            (select sl.address sl_addr from seamless sl where geom is null) as f2
            where sl_addr = ANY (all_variations);

        """

        # LOTS OF COMMANDS SHOULD BE HERE (FROM TAIL PART OF LONGSTRING ABOVE)
        # PROVE THAT TABLE IS IN ORIGINAL CONDITION
        saved_col_type_d_snd = {u'east': u'boolean',
                                 u'last_updated': u'timestamp with time zone',
                                 u'primary_name': u'text',
                                 u'sc5': u'bigint',
                                 u'uid': u'integer',
                                 u'variation': u'text',
                                 u'west': u'boolean'}
        x=pd.read_sql("""   select column_name, data_type
                            from INFORMATION_SCHEMA.COLUMNS
                            where table_name = 'snd'""",routing_eng)
        col_type_d = dict(zip(x.column_name.tolist(),x.data_type.tolist()))
        assert col_type_d==saved_col_type_d_snd

        return