Esempio n. 1
0
# input
parser = argparse.ArgumentParser()
parser.add_argument("-config", dest="CONFIG_FILE", default="config-sample.json", help="Config file")
a = parser.parse_args()

config = io.readJSON(a.CONFIG_FILE)
configContent = config["content"]

OUTPUT_DIR = "apps/{appname}/".format(appname=config["name"])
OUTPUT_SET_DIR_REL = "data/sets/"
OUTPUT_SET_DIR = OUTPUT_DIR + OUTPUT_SET_DIR_REL
CONFIG_FILE = OUTPUT_DIR + "js/config/config.sets.js"

# Make sure output dirs exist
io.makeDirectories([OUTPUT_SET_DIR, CONFIG_FILE])
sets, items = tu.getItems(config)

# Remove existing data
io.removeFiles(OUTPUT_SET_DIR + "*.json")

jsonsets = {}
for keyName, options in configContent.items():

    if "query" not in options:
        continue

    setItems = lu.filterByQueryString(items, options["query"])
    if len(setItems) > 0:
        print("%s results found for '%s'" % (len(setItems), options["query"]))
    else:
                    dest="OUTPUT_FILE",
                    default="reports/MexicoAndCentralAmerica.txt",
                    help="Output text file")
parser.add_argument('-detail',
                    dest="DETAILED_OUTPUT_FILE",
                    default="",
                    help="Output each field as an individual csv file")
a = parser.parse_args()

FIELDS_HAS = a.FIELDS_HAS.strip().split(",")
FIELDS_COUNTS = a.FIELDS_COUNTS.strip().split(",")
FIELDS_LISTS = a.FIELDS_LISTS.strip().split(",")
FIELDS_MERGE = io.parseQueryString(a.FIELDS_MERGE.strip())

# Make sure output dirs exist
io.makeDirectories([a.OUTPUT_FILE])

if len(a.DETAILED_OUTPUT_FILE) > 0:
    io.makeDirectories([a.DETAILED_OUTPUT_FILE])

items = []
fieldNames = []

if "*" in a.INPUT_FILE:
    files = glob.glob(a.INPUT_FILE)
    for fn in files:
        fFieldNames, fItems = io.readCsv(fn)
        fieldNames += fFieldNames
        items += fItems
    fieldNames = lu.unique(fieldNames)
Esempio n. 3
0
                    dest="INPUT_FILE",
                    default="data/MexicoAndCentralAmerica.csv",
                    help="File generated by html_to_csv.py")
parser.add_argument('-field',
                    dest="FIELD",
                    default="Locale",
                    help="Field to output")
parser.add_argument(
    '-out',
    dest="OUTPUT_FILE",
    default="data/processed/MexicoAndCentralAmerica_locales.csv",
    help="Output csv file")
a = parser.parse_args()

# Make sure output dirs exist
io.makeDirectories([a.OUTPUT_FILE])

fieldNames, items = io.readCsv(a.INPUT_FILE)
itemCount = len(items)

values = [item[a.FIELD] for item in items]
counter = collections.Counter(values)
counts = counter.most_common()

rows = []
for value, count in counts:
    if len(str(value).strip()) < 1:
        continue
    row = {}
    row[a.FIELD] = value
    row["Count"] = count
Esempio n. 4
0
maxCellWidth = 512
containsAlpha = config["imageHasAlpha"] if "imageHasAlpha" in config else False
defaultColor = config["defaultColor"] if "defaultColor" in config else "#3C3C3C"
filenameKey = config[
    "filenameColumn"] if "filenameColumn" in config else "filename"
imageDir = config["imageDirectory"]
noImageValue = config["noImageValue"] if "noImageValue" in config else None

OUTPUT_DIR = "apps/{appname}/".format(appname=config["name"])
OUTPUT_TEXTURES_DIR_REL = "img/textures/"
OUTPUT_TEXTURES_DIR = OUTPUT_DIR + OUTPUT_TEXTURES_DIR_REL
CONFIG_FILE = OUTPUT_DIR + "js/config/config.textures.js"

if not a.PROBE:
    # Make sure output dirs exist
    io.makeDirectories([OUTPUT_TEXTURES_DIR, CONFIG_FILE, a.CACHE_DIR])

    # Remove existing images
    io.removeFiles(OUTPUT_TEXTURES_DIR + "*.jpg")

items, categories = tu.getItems(config)

# Make texture for each set
# sets = list(configSets.items())
sets = []  # just produce the default set for now
sets = [("default", {"query": ""})] + sets  # add default set
jsonsets = {}
for keyName, options in sets:
    setItems = lu.filterByQueryString(items, options["query"])
    if len(setItems) > 0:
        print("%s results found for '%s'" % (len(setItems), options["query"]))
    "Cache-Control": "max-age=0",
    "Connection": "keep-alive",
    # "Content-Length": "123",
    "Content-Type": "application/x-www-form-urlencoded",
    "Host": "anthro.amnh.org",
    "Origin": "https://anthro.amnh.org",
    "Referer": "https://anthro.amnh.org/anthropology/databases/common/query_categories.cfm?",
    "Sec-Fetch-Mode": "nested-navigate",
    "Sec-Fetch-Site": "same-origin",
    "Sec-Fetch-User": "******",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36"
}

# Make sure output dirs exist
io.makeDirectories([a.HTML_DIR])

if a.OVERWRITE:
    io.removeFiles(a.HTML_DIR % '*')

page = 1
zeroPadding = 4
perPage = query["rec_per_page"]
totalPages = None
totalRecords = None
currentRecord = None
prevPageQuery = None
while True:

    postData = query.copy() if prevPageQuery is None else prevPageQuery
    filename = a.HTML_DIR % str(page).zfill(zeroPadding)
Esempio n. 6
0
                    dest="MAX_SUBJECTS",
                    default=64,
                    type=int,
                    help="Max number of subjects (includes 'other')")
parser.add_argument('-out',
                    dest="OUTPUT_FILE",
                    default="data/photographic_images.json",
                    help="File for output")
a = parser.parse_args()

YEAR_RANGE = [1600, 2020]

gridW, gridH = tuple([int(t) for t in a.GRID_SIZE.split("x")])

# Make sure output dirs exist
io.makeDirectories(a.OUTPUT_FILE)

# retrieve data
fieldNames, data = io.readCsv(a.INPUT_FILE)
dataCount = len(data)
_, subjectData = io.readCsv(a.SUBJECTS_FILE)
grid = np.loadtxt(a.GRID_FILE, delimiter=",")
imageFiles = glob.glob(a.IMAGE_FILES)
imageFiles = sorted(imageFiles)
fileCount = len(imageFiles)
print("Loaded %s files" % fileCount)

# process subject data
subjectData = groupList(subjectData, "subject", sort=True)
subjectCount = len(subjectData)
mainSubjects = subjectData[:a.
Esempio n. 7
0
import lib.list_utils as lu
import lib.math_utils as mu

# input
parser = argparse.ArgumentParser()
parser.add_argument('-in',
                    dest="INPUT_FILE",
                    default="data/MexicoAndCentralAmerica.csv",
                    help="File generated by html_to_csv.py")
parser.add_argument('-out',
                    dest="OUTPUT_DIR",
                    default="images/MexicoAndCentralAmerica/",
                    help="Output dir")
parser.add_argument('-overwrite',
                    dest="OVERWRITE",
                    action="store_true",
                    help="Overwrite existing data?")
a = parser.parse_args()

# Make sure output dirs exist
io.makeDirectories([a.OUTPUT_DIR])

fieldNames, items = io.readCsv(a.INPUT_FILE)
itemCount = len(items)

for i, item in enumerate(items):
    imgUrl = item['Thumb URL']
    if len(imgUrl) > 0:
        io.downloadBinaryFile(imgUrl, a.OUTPUT_DIR, overwrite=a.OVERWRITE)
    print("%s%%" % round((i + 1) / len(items) * 100.0, 2))
Esempio n. 8
0
a = parser.parse_args()

config = tu.loadConfig(a.CONFIG_FILE)
itemFields = config["itemFields"] if "itemFields" in config else None

if itemFields is None:
    print(f'No item metadata fields are set in config. Skipping...')
    sys.exit()

OUTPUT_DIR = f'apps/{config["name"]}/'
OUTPUT_FILE_REL = "data/metadata/"
OUTPUT_FILE = OUTPUT_DIR + OUTPUT_FILE_REL
CONFIG_FILE = OUTPUT_DIR + "js/config/config.metadata.js"

# Make sure output dirs exist
io.makeDirectories([OUTPUT_FILE, CONFIG_FILE])
io.removeFiles(OUTPUT_FILE + "*.json")

items, categories = tu.getItems(config)
itemCount = len(items)

# Add item slides from stories to item metadata
storyItems = {}
if "stories" in config:
    stories = config["stories"]
    for key, story in stories.items():
        for slide in story["slides"]:
            if "itemId" in slide:
                slideMeta = slide.copy()
                slideMeta.pop("itemId", None)
                storyItems[str(slide["itemId"])] = slideMeta
Esempio n. 9
0
    "countries": {
        "fontSize": config["labels"]["countryFontSize"],
        "thickness": config["labels"]["countryFontThickness"],
        "defaultView": "geographyBars",
        "layout": "bars"
    }
}

PRECISION = 5
OUTPUT_DIR = "apps/{appname}/".format(appname=config["name"])
OUTPUT_LABEL_DIR_REL = "data/labels/"
OUTPUT_LABEL_DIR = OUTPUT_DIR + OUTPUT_LABEL_DIR_REL
CONFIG_FILE = OUTPUT_DIR + "js/config/config.labels.js"

# Make sure output dirs exist
io.makeDirectories([OUTPUT_LABEL_DIR, CONFIG_FILE])

# Remove existing data
io.removeFiles(OUTPUT_LABEL_DIR + "*.json")

items, categories = tu.getItems(config)
itemCount = len(items)


def getYearLabels(userOptions={}):
    global items
    cfg = {}
    options = {"y": 0.667}
    options.update(userOptions)
    yearCol = "year"
    if yearCol not in items[0]:
parser.add_argument('-img', dest="IMAGE_FILE", default="images/{Region}/{Filename}", help="Input image file pattern")
parser.add_argument('-cache', dest="CACHE_FILE", default="tmp/imageCache_16.p.gz", help="Input image file pattern")
parser.add_argument('-out', dest="OUTPUT_FILE", default="data/viz/timeline%s.jpg", help="Output image filename")
parser.add_argument('-count', dest="IMAGE_COUNT", default=4, type=int, help="Number of images to produce")
parser.add_argument('-height', dest="IMAGE_HEIGHT", default=2160, type=int, help="Target height")
parser.add_argument('-ywidth', dest="YEAR_WIDTH", default=100, type=int, help="Target height")
parser.add_argument('-gwidth', dest="ITEM_GROUP_WIDTH", default=4.0, type=float, help="Max width of the an item group as a percentage of min(year width, region height)")
parser.add_argument('-plot', dest="PLOT", action="store_true", help="Plot the data?")
parser.add_argument('-probe', dest="PROBE", action="store_true", help="Just output details?")
a = parser.parse_args()

COLORS = ["#612323", "#204f1c", "#4d1e59", "#112e6b", "#4b5713", "#571330"]
colorCount = len(COLORS)

# Make sure output dirs exist
io.makeDirectories([a.OUTPUT_FILE, a.CACHE_FILE])
font = ImageFont.truetype(font="fonts/Open_Sans/OpenSans-Regular.ttf", size=a.FONT_SIZE)

print("Reading data...")
fieldNames, items = io.readCsv(a.INPUT_FILE)

yLabels = lu.unique([item[a.Y_AXIS] for item in items])
if a.Y_AXIS == "Region":
    items = [item for item in items if item["Region"] != "Europe"]
    itemsByRegion = lu.groupList(items, "Region")
    for i, region in enumerate(itemsByRegion):
        itemsByRegion[i]["lat"] = np.mean([item["Latitude"] for item in region["items"] if -90 <= item["Latitude"] <= 90])
    itemsByRegion = sorted(itemsByRegion, key=lambda region: -region["lat"])
    yLabels = [region["Region"] for region in itemsByRegion]
else:
    yLabels = sorted(yLabels)