cloudCoverData.append(cloudCoverEntry) keyPrecipitationType = "precipType" keyPrecipitationIntensity = "precipIntensity" type = False intens = False if keyPrecipitationType in entry: type = True if keyPrecipitationIntensity in entry: intens = True if type == True and intens == True: precipitationEntry = {'latitude':entry["latitude"],'longitude':entry["longitude"],'precipitationRate':entry["precipIntensity"],'precipitationType':entry["precipType"],'timestamp':entry["time"],"children": []} precipitationData.append(precipitationEntry) elif type == False and intens == True: #precipitationEntry = {'latitude':entry["latitude"],'longitude':entry["longitude"],'precipitationRate':entry["precipIntensity"],'timestamp':entry["time"]} precipitationEntry = {'latitude':entry["latitude"],'longitude':entry["longitude"],'precipitationRate':entry["precipIntensity"],'precipitationType':"NoData",'timestamp':entry["time"],"children": []} precipitationData.append(precipitationEntry) # adds a header to each DataArray timeResolution = 1 * 60 * 60 temperatureDict = {'meta': {'dataType': 'temperature', 'temporal': True, 'timeResolution': timeResolution}, 'root':{"children":temperatureData}} windDict = {'meta': {'dataType': 'wind', 'temporal': True, 'timeResolution': timeResolution}, 'root':{"children":windData}} cloudCoverDict = {'meta': {'dataType': 'cloudCover', 'temporal': True, 'timeResolution': timeResolution}, 'root':{"children":cloudCoverData}} precipitationDict = {'meta': {'dataType': 'precipitation', 'temporal': True, 'timeResolution': timeResolution}, 'root':{"children":precipitationData}} # writes the different kJson-files for the different weatherSources d.write_json_file(temperatureDict, 'test-data/temperature-data.kJson', True) d.write_json_file(windDict, 'test-data/wind-data.kJson', True) d.write_json_file(cloudCoverDict, 'test-data/cloud-coverage-data.kJson', True) d.write_json_file(precipitationDict, 'test-data/precipitation-data.kJson', True)
print url data = json.load(urllib2.urlopen(url, timeout=1.5)) hoursArray = data["hourly"]["data"] # add lat and lon to hourly data for hourdata in hoursArray: hourdata["longitude"] = current_longitude hourdata["latitude"] = current_latitude globallyDownloadedSets = currentlyDownloadedDataSets # store in JSON file weather_config["meta"]["downloadedSets"] = globallyDownloadedSets weather_config["data"] = weather_config["data"] + hoursArray d.write_json_file(weather_config, "weather-data-collection.json") currentlyDownloadedDataSets += 1 # next day current_time += 86400 except KeyError as e: # no weather data available for this request :( print "No weather data available for this request :(" current_time += 86400 currentlyDownloadedDataSets += 1 if currentlyDownloadedDataSets > globallyDownloadedSets: globallyDownloadedSets = currentlyDownloadedDataSets weather_config["meta"]["downloadedSets"] = globallyDownloadedSets d.write_json_file(weather_config, "weather-data-collection.json") current_latitude += 1 current_longitude -= 1
tweets = [] count = 0 for tweet in twitterjsondata: try: tweet["longitude"] = float(tweet["lon"]) tweet["latitude"] = float(tweet["lat"]) tweet["children"] = [] #tweet["timestamp"] = 0 tweet["numberOfRetweets"] = randint(0,101) timestamp = tweet["timestamp"] #print tweet tweet["timestamp"] = int(time.mktime(datetime.datetime.strptime(str(timestamp), "%Y%m%d%H%M%S").timetuple())) #print tweet if count < 10000: tweets.append(tweet) count = count + 1 except KeyError as e: print "error" + str(e) newtwitterjsondata = {'meta': {'dataType': 'tweets', 'temporal': True, 'timeResolution': 1}, 'root': {'children': tweets}} d.write_json_file(newtwitterjsondata, "tweets.kJson", True)
d['children'] = [] for k, v in d.iteritems(): if isinstance(v, OrderedDict): if k == 'children': d[k] = [v] reformat_dictionary(v) elif isinstance(v, list): for e in v: reformat_dictionary(e) else: if k == 'longitude' or k == 'latitude': d[k] = float(v) if k == 'name': count[0] += 1 if count[0] % 100 == 0: print "Formatting city {0}".format(count[0]) d = DataUtility() print 'Reading the XML file...' xml_content = d.read_xml_file('cities.xml') reformat_dictionary(xml_content) xml_content = xml_content['root'] cities = {'meta': {'dataType': 'cities', 'temporal': False}, 'root': xml_content} print 'Writing the JSON file...' d.write_json_file(cities, 'cities.json', True)
new_tree = segmented_flights[index] current_index = 0 for item in tree: best_parent_index = {} current_distance = maxint for parent_index, parent in enumerate(new_tree): parent_distance = distance( item['startPosition']['latitude'], item['startPosition']['longitude'], parent['startPosition']['latitude'], parent['startPosition']['longitude'] ) if (parent_distance < current_distance): best_parent_index = parent_index current_distance = parent_distance new_tree[best_parent_index]['children'].append(item) if current_index % 20 == 0 or current_index + 1 == len(tree): stdout.write('\r' + str('Item {0}/{1}'.format(current_index, len(tree) - 1))) stdout.flush() current_index += 1 tree = new_tree print '\n' print 'Writing rearranged data to JSON file...' flights = {'meta': {'dataType': 'flights', 'temporal': False}, 'root': { 'children': tree}} d.write_json_file(flights, '../test-data/flights.json', True)
from DataUtility import DataUtility d = DataUtility() # Read an example CSV file csv_file = d.read_csv_file('example-data/test.csv') print csv_file # Access a specific row and key print csv_file[0]['author'] # Read an example XML file xml_file = d.read_xml_file('example-data/test.xml') print xml_file # Access a specific entry from the XML tree print xml_file['cities']['stadt']['stadt'][1]['name'] # Read an example JSON file json_file = d.read_json_file('example-data/test.json') print json_file # Access a specific entry from the JSON tree print json_file['globe']['radius'] # Write one of the dictionaries to a new JSON file d.write_json_file(csv_file, 'example-data/output.json')