def read_places(id_starts_with): """Read places for this script.""" places = sgdb.read_places() places = places.loc[pd.notnull(places.ptol_lat), :] places = places.drop_duplicates('ptol_id') places = places.loc[:, KEY_PLACE_FIELDNAMES] places = places.loc[places.ptol_id.str.startswith(id_starts_with), :] places = pd.merge(places, geocode.read_geocodes(), how='left') places.loc[pd.notnull(places.modern_lat), 'disposition'] = 'known' places.loc[pd.isnull(places.modern_lat), 'disposition'] = 'unknown' places.set_index('ptol_id', False, False, True, True) return places
def read_places(id_starts_with): """Read places for this script.""" places = sgdb.read_places() places = places.loc[pd.notnull(places.ptol_lat), :] places = places.drop_duplicates("ptol_id") places = places.loc[:, KEY_PLACE_FIELDNAMES] places = places.loc[places.ptol_id.str.startswith(id_starts_with), :] print len(places.ptol_id) places = pd.merge(places, geocode.read_geocodes(), how="left") places.loc[pd.notnull(places.modern_lat), "disposition"] = "known" places.loc[pd.isnull(places.modern_lat), "disposition"] = "unknown" places.set_index("ptol_id", False, False, True, True) print len(places.ptol_id) return places
'modern_name'] X_NAMES = [ 'ptol_lat', 'ptol_lon'] # book 7 contains India # chapter 1 is within the Ganges TARGET_BOOK = '7.01' places = sgdb.read_places().drop_duplicates('ptol_id') places.reindex(columns=['ptol_id']) places = places.loc[pd.notnull(places.ptol_lat), :] places = places.loc[:, KEY_PLACE_FIELDNAMES] places = places.loc[places.ptol_id.str.startswith(TARGET_BOOK), :] places = pd.merge(places, geocode.read_geocodes(), how='left') known = places.loc[pd.notnull(places.modern_lat), :] known.is_copy = False known.to_csv('../Data/regression_measure_before.csv', encoding='cp1252') loo = LeaveOneOut(len(known)) for train, test in loo: trainx = known.iloc[train, :].loc[:, X_NAMES] testx = known.iloc[test, :].loc[:, X_NAMES] lonreg = linear_model.LinearRegression() latreg = linear_model.LinearRegression() lonreg.fit(trainx, known.iloc[train, :].modern_lon) latreg.fit(trainx, known.iloc[train, :].modern_lat) known.loc[known.iloc[test, :].index, 'pred_lat'] = latreg.predict(testx) known.loc[known.iloc[test, :].index, 'pred_lon'] = lonreg.predict(testx)
KEY_PLACE_FIELDNAMES = [ 'ptol_id', 'ptol_name', 'ptol_lat', 'ptol_lon', 'modern_name' ] X_NAMES = ['ptol_lat', 'ptol_lon'] # book 7 contains India # chapter 1 is within the Ganges TARGET_BOOK = '7.01' places = sgdb.read_places().drop_duplicates('ptol_id') places.reindex(columns=['ptol_id']) places = places.loc[pd.notnull(places.ptol_lat), :] places = places.loc[:, KEY_PLACE_FIELDNAMES] places = places.loc[places.ptol_id.str.startswith(TARGET_BOOK), :] places = pd.merge(places, geocode.read_geocodes(), how='left') known = places.loc[pd.notnull(places.modern_lat), :] known.is_copy = False known.to_csv('../Data/regression_measure_before.csv', encoding='cp1252') loo = LeaveOneOut(len(known)) for train, test in loo: trainx = known.iloc[train, :].loc[:, X_NAMES] testx = known.iloc[test, :].loc[:, X_NAMES] lonreg = linear_model.LinearRegression() latreg = linear_model.LinearRegression() lonreg.fit(trainx, known.iloc[train, :].modern_lon) latreg.fit(trainx, known.iloc[train, :].modern_lat) known.loc[known.iloc[test, :].index, 'pred_lat'] = latreg.predict(testx) known.loc[known.iloc[test, :].index, 'pred_lon'] = lonreg.predict(testx)