def getTimelineValues(): global items yearCol = "year" if yearCol not in items[0]: print("Could not find column %s in items, please add this column to metadata cols with 'type' = 'int'" % yearCol) sys.exit() years = [item[yearCol] for item in items] minYear = min(years) maxYear = max(years) totalYears = maxYear - minYear + 1 groups = lu.groupList(items, yearCol) # group by year yearDataLookup = lu.createLookup(groups, yearCol) timelineItems = [] for i in range(totalYears): year = i + minYear yearKey = str(year) count = 0 if yearKey in yearDataLookup: count = yearDataLookup[yearKey]["count"] timelineItems.append({ "year": year, "value": count }) return timelineItems
cleanedItems[i]["Acquisition Type"] = acquisitionType.capitalize() cleanedItems[i]["Acquisition Type Confidence"] = typeConfidence # Debugging... # lu.countValues(cleanedItems, "Acquisition Year") # lu.countValues(cleanedItems, "Acquisition Type") # sys.exit() ############################################## # STEP 2: retrieve country # ############################################## # lu.countValues(items, "Country") _, countrySynonyms = io.readCsv("data/usergen/CountriesSynonyms.csv") countrySynonymLookup = lu.createLookup(countrySynonyms, "alt") _, countryLatLons = io.readCsv("data/vendor/CountriesLatLon.csv") countryLookup = lu.createLookup(countryLatLons, "name") for i, item in enumerate(items): country = item["Country"].strip() countryConfidence = 1.0 if len(country) < 1: continue # normalize lists country = country.replace("/", ", ") country = country.replace(" OR ", ", ") if "," in country: countryConfidence *= 0.5
# input parser = argparse.ArgumentParser() parser.add_argument("-in", dest="INPUT_FILE", default="data/processed/MexicoAndCentralAmerica_cleaned.csv", help="File generated by clean_data.py") parser.add_argument("-out", dest="OUTPUT_FILE", default="data/processed/MexicoAndCentralAmerica_geotag.csv", help="Output csv file") a = parser.parse_args() # Make sure output dirs exist io.makeDirectories([a.OUTPUT_FILE]) fieldNames, items = io.readCsv(a.INPUT_FILE) itemCount = len(items) items = [item for item in items if len(item["Locale"].strip()) > 0 and len(item["Country"].strip()) > 0] for i, item in enumerate(items): items[i]["LookupString"] = item["Locale"] lookupTable = lu.createLookup(items, "LookupString") values = [item["LookupString"] for item in items] counter = collections.Counter(values) counts = counter.most_common() rows = [] for value, count in counts: if len(str(value).strip()) < 1: continue row = {} row["geoname"] = value item = lookupTable[value] row["id"] = item["Id"] row["country"] = item["Country"]
"items": timelineValues } # Category key keys["categories"] = { "type": "legend", "parent": "#legend-container", "items": categories } yearRange = [timelineValues[0]["year"], timelineValues[-1]["year"]] outjson["keys"] = keys # Generate content/Stories validStories = {} if "stories" in config: itemLookup = lu.createLookup(items, "_id") stories = config["stories"] for key, story in stories.items(): # determine which item to highlight for this story hotspotItemIndex = -1 if "itemId" in story and story["itemId"] in itemLookup: hotspotItemIndex = itemLookup[story["itemId"]]["index"] else: # retrieve the story-specific items storyItems = lu.filterByQueryString(items, story["query"]) if "query" in story else items[:] # limit the results if specified if "limit" in story and len(storyItems) > story["limit"]: storyItems = storyItems[:story["limit"]]
def getSphereCategoryTimelineLayout(userOptions={}): global items global sets cfg = {"layout": "spheres"} categoryCol = "category" yearCol = "year" if yearCol not in items[0]: print( "Could not find column %s in items, please add this column to metadata cols with 'type' = 'int'" % yearCol) sys.exit() if categoryCol not in sets: print( "Could not find column %s in sets, please add this column to metadata cols with 'asIndex' = true" % categoryCol) sys.exit() categorySet = sets[categoryCol] categoryCount = len(categorySet) dimensions = 3 groups = lu.groupList(items, yearCol) # group by year groups = sorted(groups, key=lambda group: group[yearCol]) years = [item[yearCol] for item in items] minYear = min(years) maxYear = max(years) + 1 nUnit = 1.0 / (maxYear - minYear) # determine category sphere count range minCount = 9999999999 maxCount = 0 for i, group in enumerate(groups): subgroups = lu.groupList(group["items"], categoryCol) # group by category for subgroup in subgroups: minCount = min(minCount, subgroup["count"]) maxCount = max(maxCount, subgroup["count"]) groups[i]["categoryGroups"] = subgroups # assign position values values = np.zeros(len(items) * dimensions) for i, group in enumerate(groups): z = mu.norm( group[yearCol], (minYear, maxYear)) + nUnit * 0.5 # place spheres in the center of the year subgroups = group["categoryGroups"] subgroupLookup = lu.createLookup(subgroups, categoryCol) for j, category in enumerate(categorySet): x = 1.0 - 1.0 * j / (categoryCount - 1) categoryKey = str(j) if categoryKey in subgroupLookup: subgroup = subgroupLookup[categoryKey] y = mu.norm(subgroup["count"], (minCount, maxCount)) y = mu.lerp((0.01, 1.0), y) for catItem in subgroup["items"]: itemIndex = catItem["index"] values[itemIndex * dimensions] = round(x, PRECISION) values[itemIndex * dimensions + 1] = round(y, PRECISION) values[itemIndex * dimensions + 2] = round(z, PRECISION) values = values.tolist() return (cfg, values)
def getSphereCategoryTimelineLayout(userOptions={}): global items global categories cfg = { "layout": "spheres" } categoryCol = "category" yearCol = "year" if yearCol not in items[0]: print("`dateColumn` needs to be set in config yml to support timelineTracks layout") return (False, False) if categoryCol not in items[0]: print("`groupByColumn` needs to be set in config yml to support timelineTracks layout") return (False, False) categoryCount = len(categories) dimensions = 3 groups = lu.groupList(items, yearCol) # group by year groups = sorted(groups, key=lambda group: group[yearCol]) years = [item[yearCol] for item in items] minYear = min(years) maxYear = max(years) + 1 nUnit = 1.0 / (maxYear-minYear) # determine category sphere count range minCount = 9999999999 maxCount = 0 for i, group in enumerate(groups): subgroups = lu.groupList(group["items"], categoryCol) # group by category for subgroup in subgroups: minCount = min(minCount, subgroup["count"]) maxCount = max(maxCount, subgroup["count"]) groups[i]["categoryGroups"] = subgroups # assign position values values = np.zeros(len(items) * dimensions) for i, group in enumerate(groups): z = mu.norm(group[yearCol], (minYear, maxYear)) + nUnit*0.5 # place spheres in the center of the year subgroups = group["categoryGroups"] subgroupLookup = lu.createLookup(subgroups, categoryCol) for j, category in enumerate(categories): x = 1.0 - 1.0 * j / (categoryCount-1) categoryKey = category["text"] if categoryKey in subgroupLookup: subgroup = subgroupLookup[categoryKey] y = mu.norm(subgroup["count"], (minCount, maxCount)) y = mu.lerp((0.01, 1.0), y) for catItem in subgroup["items"]: itemIndex = catItem["index"] cy = y # a bit of a hack to ensure highighted items are visible if itemHasStory(catItem): cy = y + 1.25 values[itemIndex*dimensions] = round(x, PRECISION) values[itemIndex*dimensions+1] = round(cy, PRECISION) values[itemIndex*dimensions+2] = round(z, PRECISION) values = values.tolist() return (cfg, values)
# pprint(xLabels) # pprint(yLabels) itemCount = len(items) for i, item in enumerate(items): items[i]["index"] = i # First group by X groupedItems = lu.groupList(items, a.X_AXIS) # Then by Y maxGroupItemCount = 0 for i, xgroup in enumerate(groupedItems): ygroups = lu.groupList(xgroup["items"], a.Y_AXIS) maxGroupItemCount = max(maxGroupItemCount, max(group["count"] for group in ygroups)) groupedItems[i]["items"] = lu.createLookup(ygroups, a.Y_AXIS) # Debug with matplotlib if a.PLOT: import matplotlib.pyplot as plt yGroups = [[0]*xLabelCount for ygroup in yLabels] for i, xgroup in enumerate(groupedItems): for j, yLabel in enumerate(yLabels): count = 0 if yLabel in xgroup["items"]: count = len(xgroup["items"][yLabel]["items"]) yGroups[j][i] = count bars = [] for i, yGroup in enumerate(yGroups): bar = None if i > 0:
itemCount = len(items) for i, item in enumerate(items): items[i]["LookupString"] = "" # only lookup items that have country and locale with values if len(item["Country"]) < 1 or len(item[LOCALE_FIELD]) < 1: continue items[i]["LookupString"] = item[LOCALE_FIELD] + ", " + item["Country"] locales = [] localeLookup = {} if os.path.isfile(a.CACHE_FILE): _, locales = io.readCsv(a.CACHE_FILE) localeLookup = lu.createLookup(locales, "LookupString") if "GeoName" not in fieldNames: fieldNames.append("GeoName") values = [item["LookupString"] for item in items] counter = collections.Counter(values) counts = counter.most_common() total = len(counts) geolocator = Nominatim(user_agent=a.USER_AGENT_STRING) for i, c in enumerate(counts): value, count = c if len(str(value).strip()) < 1: