Example #1
0
def getTimelineValues():
    global items

    yearCol = "year"
    if yearCol not in items[0]:
        print("Could not find column %s in items, please add this column to metadata cols with 'type' = 'int'" % yearCol)
        sys.exit()

    years = [item[yearCol] for item in items]
    minYear = min(years)
    maxYear = max(years)
    totalYears = maxYear - minYear + 1

    groups = lu.groupList(items, yearCol) # group by year
    yearDataLookup = lu.createLookup(groups, yearCol)

    timelineItems = []
    for i in range(totalYears):
        year = i + minYear
        yearKey = str(year)
        count = 0
        if yearKey in yearDataLookup:
            count = yearDataLookup[yearKey]["count"]
        timelineItems.append({
            "year": year,
            "value": count
        })
    return timelineItems
    cleanedItems[i]["Acquisition Type"] = acquisitionType.capitalize()
    cleanedItems[i]["Acquisition Type Confidence"] = typeConfidence

# Debugging...
# lu.countValues(cleanedItems, "Acquisition Year")
# lu.countValues(cleanedItems, "Acquisition Type")
# sys.exit()

##############################################
# STEP 2: retrieve country                   #
##############################################

# lu.countValues(items, "Country")

_, countrySynonyms = io.readCsv("data/usergen/CountriesSynonyms.csv")
countrySynonymLookup = lu.createLookup(countrySynonyms, "alt")
_, countryLatLons = io.readCsv("data/vendor/CountriesLatLon.csv")
countryLookup = lu.createLookup(countryLatLons, "name")

for i, item in enumerate(items):
    country = item["Country"].strip()
    countryConfidence = 1.0
    if len(country) < 1:
        continue

    # normalize lists
    country = country.replace("/", ", ")
    country = country.replace(" OR ", ", ")
    if "," in country:
        countryConfidence *= 0.5
Example #3
0
# input
parser = argparse.ArgumentParser()
parser.add_argument("-in", dest="INPUT_FILE", default="data/processed/MexicoAndCentralAmerica_cleaned.csv", help="File generated by clean_data.py")
parser.add_argument("-out", dest="OUTPUT_FILE", default="data/processed/MexicoAndCentralAmerica_geotag.csv", help="Output csv file")
a = parser.parse_args()

# Make sure output dirs exist
io.makeDirectories([a.OUTPUT_FILE])

fieldNames, items = io.readCsv(a.INPUT_FILE)
itemCount = len(items)

items = [item for item in items if len(item["Locale"].strip()) > 0 and len(item["Country"].strip()) > 0]
for i, item in enumerate(items):
    items[i]["LookupString"] = item["Locale"]
lookupTable = lu.createLookup(items, "LookupString")

values = [item["LookupString"] for item in items]
counter = collections.Counter(values)
counts = counter.most_common()

rows = []
for value, count in counts:
    if len(str(value).strip()) < 1:
        continue
    row = {}
    row["geoname"] = value

    item = lookupTable[value]
    row["id"] = item["Id"]
    row["country"] = item["Country"]
Example #4
0
    "items": timelineValues
}
# Category key
keys["categories"] = {
    "type": "legend",
    "parent": "#legend-container",
    "items": categories
}
yearRange = [timelineValues[0]["year"], timelineValues[-1]["year"]]
outjson["keys"] = keys

# Generate content/Stories
validStories = {}
if "stories" in config:

    itemLookup = lu.createLookup(items, "_id")

    stories = config["stories"]
    for key, story in stories.items():

        # determine which item to highlight for this story
        hotspotItemIndex = -1
        if "itemId" in story and story["itemId"] in itemLookup:
            hotspotItemIndex = itemLookup[story["itemId"]]["index"]

        else:
            # retrieve the story-specific items
            storyItems = lu.filterByQueryString(items, story["query"]) if "query" in story else items[:]
            # limit the results if specified
            if "limit" in story and len(storyItems) > story["limit"]:
                storyItems = storyItems[:story["limit"]]
Example #5
0
def getSphereCategoryTimelineLayout(userOptions={}):
    global items
    global sets
    cfg = {"layout": "spheres"}

    categoryCol = "category"
    yearCol = "year"
    if yearCol not in items[0]:
        print(
            "Could not find column %s in items, please add this column to metadata cols with 'type' = 'int'"
            % yearCol)
        sys.exit()
    if categoryCol not in sets:
        print(
            "Could not find column %s in sets, please add this column to metadata cols with 'asIndex' = true"
            % categoryCol)
        sys.exit()

    categorySet = sets[categoryCol]
    categoryCount = len(categorySet)
    dimensions = 3
    groups = lu.groupList(items, yearCol)  # group by year
    groups = sorted(groups, key=lambda group: group[yearCol])
    years = [item[yearCol] for item in items]
    minYear = min(years)
    maxYear = max(years) + 1
    nUnit = 1.0 / (maxYear - minYear)

    # determine category sphere count range
    minCount = 9999999999
    maxCount = 0
    for i, group in enumerate(groups):
        subgroups = lu.groupList(group["items"],
                                 categoryCol)  # group by category
        for subgroup in subgroups:
            minCount = min(minCount, subgroup["count"])
            maxCount = max(maxCount, subgroup["count"])
        groups[i]["categoryGroups"] = subgroups

    # assign position values
    values = np.zeros(len(items) * dimensions)
    for i, group in enumerate(groups):
        z = mu.norm(
            group[yearCol],
            (minYear,
             maxYear)) + nUnit * 0.5  # place spheres in the center of the year
        subgroups = group["categoryGroups"]
        subgroupLookup = lu.createLookup(subgroups, categoryCol)
        for j, category in enumerate(categorySet):
            x = 1.0 - 1.0 * j / (categoryCount - 1)
            categoryKey = str(j)
            if categoryKey in subgroupLookup:
                subgroup = subgroupLookup[categoryKey]
                y = mu.norm(subgroup["count"], (minCount, maxCount))
                y = mu.lerp((0.01, 1.0), y)
                for catItem in subgroup["items"]:
                    itemIndex = catItem["index"]
                    values[itemIndex * dimensions] = round(x, PRECISION)
                    values[itemIndex * dimensions + 1] = round(y, PRECISION)
                    values[itemIndex * dimensions + 2] = round(z, PRECISION)

    values = values.tolist()
    return (cfg, values)
def getSphereCategoryTimelineLayout(userOptions={}):
    global items
    global categories
    cfg = {
        "layout": "spheres"
    }

    categoryCol = "category"
    yearCol = "year"
    if yearCol not in items[0]:
        print("`dateColumn` needs to be set in config yml to support timelineTracks layout")
        return (False, False)

    if categoryCol not in items[0]:
        print("`groupByColumn` needs to be set in config yml to support timelineTracks layout")
        return (False, False)

    categoryCount = len(categories)
    dimensions = 3
    groups = lu.groupList(items, yearCol) # group by year
    groups = sorted(groups, key=lambda group: group[yearCol])
    years = [item[yearCol] for item in items]
    minYear = min(years)
    maxYear = max(years) + 1
    nUnit = 1.0 / (maxYear-minYear)

    # determine category sphere count range
    minCount = 9999999999
    maxCount = 0
    for i, group in enumerate(groups):
        subgroups = lu.groupList(group["items"], categoryCol) # group by category
        for subgroup in subgroups:
            minCount = min(minCount, subgroup["count"])
            maxCount = max(maxCount, subgroup["count"])
        groups[i]["categoryGroups"] = subgroups

    # assign position values
    values = np.zeros(len(items) * dimensions)
    for i, group in enumerate(groups):
        z = mu.norm(group[yearCol], (minYear, maxYear)) + nUnit*0.5 # place spheres in the center of the year
        subgroups = group["categoryGroups"]
        subgroupLookup = lu.createLookup(subgroups, categoryCol)
        for j, category in enumerate(categories):
            x = 1.0 - 1.0 * j / (categoryCount-1)
            categoryKey = category["text"]
            if categoryKey in subgroupLookup:
                subgroup = subgroupLookup[categoryKey]
                y = mu.norm(subgroup["count"], (minCount, maxCount))
                y = mu.lerp((0.01, 1.0), y)
                for catItem in subgroup["items"]:
                    itemIndex = catItem["index"]
                    cy = y
                    # a bit of a hack to ensure highighted items are visible
                    if itemHasStory(catItem):
                        cy = y + 1.25
                    values[itemIndex*dimensions] = round(x, PRECISION)
                    values[itemIndex*dimensions+1] = round(cy, PRECISION)
                    values[itemIndex*dimensions+2] = round(z, PRECISION)

    values = values.tolist()

    return (cfg, values)
# pprint(xLabels)
# pprint(yLabels)

itemCount = len(items)
for i, item in enumerate(items):
    items[i]["index"] = i

# First group by X
groupedItems = lu.groupList(items, a.X_AXIS)

# Then by Y
maxGroupItemCount = 0
for i, xgroup in enumerate(groupedItems):
    ygroups = lu.groupList(xgroup["items"], a.Y_AXIS)
    maxGroupItemCount = max(maxGroupItemCount, max(group["count"] for group in ygroups))
    groupedItems[i]["items"] = lu.createLookup(ygroups, a.Y_AXIS)

# Debug with matplotlib
if a.PLOT:
    import matplotlib.pyplot as plt
    yGroups = [[0]*xLabelCount for ygroup in yLabels]
    for i, xgroup in enumerate(groupedItems):
        for j, yLabel in enumerate(yLabels):
            count = 0
            if yLabel in xgroup["items"]:
                count = len(xgroup["items"][yLabel]["items"])
            yGroups[j][i] = count
    bars = []
    for i, yGroup in enumerate(yGroups):
        bar = None
        if i > 0:
itemCount = len(items)

for i, item in enumerate(items):
    items[i]["LookupString"] = ""

    # only lookup items that have country and locale with values
    if len(item["Country"]) < 1 or len(item[LOCALE_FIELD]) < 1:
        continue

    items[i]["LookupString"] = item[LOCALE_FIELD] + ", " + item["Country"]

locales = []
localeLookup = {}
if os.path.isfile(a.CACHE_FILE):
    _, locales = io.readCsv(a.CACHE_FILE)
    localeLookup = lu.createLookup(locales, "LookupString")

if "GeoName" not in fieldNames:
    fieldNames.append("GeoName")

values = [item["LookupString"] for item in items]
counter = collections.Counter(values)
counts = counter.most_common()
total = len(counts)

geolocator = Nominatim(user_agent=a.USER_AGENT_STRING)

for i, c in enumerate(counts):
    value, count = c

    if len(str(value).strip()) < 1: