Exemple #1
0
def read_places(id_starts_with):
    """Read places for this script."""
    places = sgdb.read_places()
    places = places.loc[pd.notnull(places.ptol_lat), :]
    places = places.drop_duplicates('ptol_id')
    places = places.loc[:, KEY_PLACE_FIELDNAMES]
    places = places.loc[places.ptol_id.str.startswith(id_starts_with), :]
    places = pd.merge(places, geocode.read_geocodes(), how='left')
    places.loc[pd.notnull(places.modern_lat), 'disposition'] = 'known'
    places.loc[pd.isnull(places.modern_lat), 'disposition'] = 'unknown'
    places.set_index('ptol_id', False, False, True, True)
    return places
Exemple #2
0
def read_places(id_starts_with):
    """Read places for this script."""
    places = sgdb.read_places()
    places = places.loc[pd.notnull(places.ptol_lat), :]
    places = places.drop_duplicates('ptol_id')
    places = places.loc[:, KEY_PLACE_FIELDNAMES]
    places = places.loc[places.ptol_id.str.startswith(id_starts_with), :]
    places = pd.merge(places, geocode.read_geocodes(), how='left')
    places.loc[pd.notnull(places.modern_lat), 'disposition'] = 'known'
    places.loc[pd.isnull(places.modern_lat), 'disposition'] = 'unknown'
    places.set_index('ptol_id', False, False, True, True)
    return places
Exemple #3
0
def read_places(id_starts_with):
    """Read places for this script."""
    places = sgdb.read_places()
    places = places.loc[pd.notnull(places.ptol_lat), :]
    places = places.drop_duplicates("ptol_id")
    places = places.loc[:, KEY_PLACE_FIELDNAMES]
    places = places.loc[places.ptol_id.str.startswith(id_starts_with), :]
    print len(places.ptol_id)
    places = pd.merge(places, geocode.read_geocodes(), how="left")
    places.loc[pd.notnull(places.modern_lat), "disposition"] = "known"
    places.loc[pd.isnull(places.modern_lat), "disposition"] = "unknown"
    places.set_index("ptol_id", False, False, True, True)
    print len(places.ptol_id)
    return places
    'modern_name']

X_NAMES = [
    'ptol_lat',
    'ptol_lon']

# book 7 contains India
# chapter 1 is within the Ganges
TARGET_BOOK = '7.01' 

places = sgdb.read_places().drop_duplicates('ptol_id')
places.reindex(columns=['ptol_id'])
places = places.loc[pd.notnull(places.ptol_lat), :]
places = places.loc[:, KEY_PLACE_FIELDNAMES]
places = places.loc[places.ptol_id.str.startswith(TARGET_BOOK), :]
places = pd.merge(places, geocode.read_geocodes(), how='left')
known = places.loc[pd.notnull(places.modern_lat), :]
known.is_copy = False
known.to_csv('../Data/regression_measure_before.csv', encoding='cp1252')

loo = LeaveOneOut(len(known))
for train, test in loo:
    trainx = known.iloc[train, :].loc[:, X_NAMES]
    testx = known.iloc[test, :].loc[:, X_NAMES]
    lonreg = linear_model.LinearRegression()
    latreg = linear_model.LinearRegression()
    lonreg.fit(trainx, known.iloc[train, :].modern_lon)
    latreg.fit(trainx, known.iloc[train, :].modern_lat)
    known.loc[known.iloc[test, :].index, 'pred_lat'] = latreg.predict(testx)
    known.loc[known.iloc[test, :].index, 'pred_lon'] = lonreg.predict(testx)
Exemple #5
0
KEY_PLACE_FIELDNAMES = [
    'ptol_id', 'ptol_name', 'ptol_lat', 'ptol_lon', 'modern_name'
]

X_NAMES = ['ptol_lat', 'ptol_lon']

# book 7 contains India
# chapter 1 is within the Ganges
TARGET_BOOK = '7.01'

places = sgdb.read_places().drop_duplicates('ptol_id')
places.reindex(columns=['ptol_id'])
places = places.loc[pd.notnull(places.ptol_lat), :]
places = places.loc[:, KEY_PLACE_FIELDNAMES]
places = places.loc[places.ptol_id.str.startswith(TARGET_BOOK), :]
places = pd.merge(places, geocode.read_geocodes(), how='left')
known = places.loc[pd.notnull(places.modern_lat), :]
known.is_copy = False
known.to_csv('../Data/regression_measure_before.csv', encoding='cp1252')

loo = LeaveOneOut(len(known))
for train, test in loo:
    trainx = known.iloc[train, :].loc[:, X_NAMES]
    testx = known.iloc[test, :].loc[:, X_NAMES]
    lonreg = linear_model.LinearRegression()
    latreg = linear_model.LinearRegression()
    lonreg.fit(trainx, known.iloc[train, :].modern_lon)
    latreg.fit(trainx, known.iloc[train, :].modern_lat)
    known.loc[known.iloc[test, :].index, 'pred_lat'] = latreg.predict(testx)
    known.loc[known.iloc[test, :].index, 'pred_lon'] = lonreg.predict(testx)