def work(modelDir, inputDict): ''' Run the model in its directory.''' outData = {} # Run VBAT code. source = inputDict["source"] year = inputDict["year"] station = inputDict["station"] parameter = inputDict["weatherParameter"] state_city = inputDict["state_city"] verifiedData = [] errorCount = 0 #check the source using and use the appropriate function if source == "METAR": data = pullMETAR(year,station,parameter) with open(pJoin(modelDir,"weather.csv"),"w") as file: file.write(data) elif source == "USCRN": data = pullUSCRN(year,state_city,parameter) with open(pJoin(modelDir,"weather.csv"),"w") as file: writer = csv.writer(file) writer.writerows([[x] for x in data]) #writing raw data if parameter != "metar" and source == "METAR":#raw metar should not be formated as it is already in its own format and difficult to handle verifiedData = [999.9]*8760 firstDT = dt.datetime(int(year),1,1,0) with open(pJoin(modelDir,"weather.csv"),"r") as file: reader = csv.reader(file) for row in reader: if row[1] != "valid" and row[2] != "M": d = parseDt(row[1]) deltatime = d - firstDT verifiedData[int(math.floor((deltatime.total_seconds())/(60*60)))] = row[2] #storing good data to allOutputData.json and weather.csv outData["data"] = verifiedData with open(pJoin(modelDir,"weather.csv"),"wb") as file: writer = csv.writer(file) writer.writerows([[x] for x in verifiedData]) elif source == "USCRN": verifiedData = []#[999.9]*8760 with open(pJoin(modelDir,"weather.csv"),"r") as file: reader = csv.reader(file) for row in reader: verifiedData.append(row[0]) print row[0] with open(pJoin(modelDir,"weather.csv"),"wb") as file: writer = csv.writer(file) writer.writerows([[x] for x in verifiedData]) #checking how many wrong values there are for each in verifiedData: if each == 999.9: errorCount += 1 outData["errorCount"] = errorCount outData["stdout"] = "Success" return outData
def trafficLogStats(logsPath, outFilePath): # Read in a file containing the full access log. if logsPath.endswith('.zip'): # Support for reading zipped logs. zfile = zipfile.ZipFile(logsPath, 'r') fname = [x for x in zfile.namelist() if '/' not in x][0] zcontent = zfile.open(fname) lines = zcontent.readlines() else: # Support for plain text logs. logfile = open(logsPath, 'r') lines = logfile.readlines() logfile.close() # Create data structures for tracking metrics recordCount = collections.Counter() monthCount = collections.Counter() browserCount = collections.Counter() IPCount = collections.Counter() userCount = collections.Counter() users = set() # Create set of users to prevent duplications. # Process the log file to generate hit and session counts. # Filter out lines containing these strings. for line in lines: # Now split and define things. words = line.split() try: ip = geolite2.lookup(words[0]) except: ip = None if ip is not None and ip.country is not None: if ip.country == 'XK': IPCount["Kosovo"] += 1 else: nation = countries.get(ip.country) IPCount[nation.name] += 1 # Browser Type if "Chrome" in line: browserCount["Chrome"] += 1 elif "Firefox" in line: browserCount["FireFox"] += 1 elif "Safari" in line: browserCount["Safari"] += 1 elif "Explorer" in line: browserCount["Internet Explorer"] += 1 else: browserCount["Other"] += 1 browserCount["Other"] += 1 # Get date of access. try: dtStr = words[3][1:].replace(':', ' ', 1) dt = parseDt(dtStr) accessDt = str(dt.year) + '-' + str(dt.month).zfill(2) except: accessDt = '2019-01' # Is this is a unique viewer? ipStr = words[0] if ipStr not in users: # Add another user to the count. recordCount[accessDt] += 1 users.add(ipStr) # No matter what, we update the monthly count. monthCount[accessDt] += 1 userCount[ipStr] += 1 # Set up plotting: plt.figure(figsize=(15, 15)) ggColors = [x['color'] for x in plt.rcParams['axes.prop_cycle']] # Session counts by month: log = collections.OrderedDict(sorted(recordCount.items(), key=lambda x:x[0])) plt.subplot(3, 1, 1) ax = plt.gca() totalSessions = "{:,}".format(sum(log.values())) creationTime = datetime.now().strftime('%Y-%m-%d') ax.set_title('Session Count By Month. Total: ' + totalSessions + '\nGenerated: ' + creationTime) barRange = range(len(log)) plt.bar(barRange, log.values(), align='center') plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()]) plt.axis('tight') # Hit counts by month: log = collections.OrderedDict(sorted(monthCount.items(), key=lambda x:x[0])) plt.subplot(3, 1, 2) ax = plt.gca() ax.set_title('Hit Count By Month. Total: ' + "{:,}".format(sum(log.values()))) barRange = range(len(log)) plt.bar(barRange, log.values(), align='center') plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()]) plt.axis('tight') # Plot the hits per user histogram: userElements = userCount.items() browserElements = browserCount.items() plt.subplot(3, 3, 7) userValues = list(pair[1] for pair in userElements) title = 'Histogram of Hits Per User' plt.title(title) plt.hist(userValues, bins=range(0, 50, 5)) # Country hit counts: log = collections.OrderedDict(sorted(IPCount.items(), key=lambda x: x[1], reverse=True)) countryTotal = str(len(log)) # Just look at top 10 countries: for i, k in enumerate(log): if i > 10: del log[k] plt.subplot(3, 3, 8) ax = plt.gca() title = 'Hits by Country. Total Countries: ' + countryTotal ax.set_title(title) people = [x[0:14] for x in log.keys()] y_pos = range(len(people)) performance = [x for x in log.values()] ax.barh(y_pos, performance, align='center') ax.set_yticks(y_pos) ax.set_yticklabels(people, fontsize=8) ax.invert_yaxis() # labels read top-to-bottom # Plot of the number of hits by IP address: # u_label_list = list(pair[0] for pair in userElements) # IPCounts = filter(lambda x: x[1] >= 100, [(l, s) for l, s in zip(u_label_list, userValues)]) # colors = ('red', 'green', 'orange', 'cyan', 'brown', 'grey', 'blue', 'indigo', 'beige', 'yellow') # plt.pie(sorted(userValues, reverse=True), colors=colors) # plt.axis("equal") # plt.legend(loc=(-0.15, 0.05), labels=sorted(IPCounts, key = lambda x: x[1], reverse=True), shadow=True) # plt.savefig('USER HITS' + '.png') # plt.show() # plt.close() # def IPConvert(ip): # removePeriods = ''.join(ip.split('.')) # final = removePeriods.replace(':', '') # return int((''.join(ip.split('.')).replace(':',''))) # Browser type breakdown: b_label_list = list(x[0] for x in browserElements if x[0] != 'Other') browserValues = list(int(x[1]) for x in browserElements if x[0] != 'Other') plt.subplot(3, 3, 9) plt.pie(sorted(browserValues, reverse=True), colors=ggColors) browserLabels = [(l, s) for l, s in zip(b_label_list, browserValues)] plt.legend(labels=sorted(browserLabels, key=lambda x: x[1], reverse=True), shadow=True) plt.title('Browser Type Breakdown') # Adjust and write out the image. plt.subplots_adjust(left=0.1, right=0.9) plt.savefig(outFilePath)
def trafficLogStats(logsPath, outFilePath): # Read in a file containing the full access log. if logsPath.endswith('.zip'): # Support for reading zipped logs. with zipfile.ZipFile(logsPath, 'r') as zfile: fname = [x for x in zfile.namelist() if '/' not in x][0] with zfile.open(fname) as zcontent_file: lines = zcontent_file.readlines() else: # Support for plain text logs. logfile = open(logsPath, 'r') lines = logfile.readlines() logfile.close() # Create data structures for tracking metrics recordCount = collections.Counter() monthCount = collections.Counter() browserCount = collections.Counter() IPCount = collections.Counter() userCount = collections.Counter() users = set() # Create set of users to prevent duplications. locs = [] # Process the log file to generate hit and session counts. # Filter out lines containing these strings. for line in lines: # Now split and define things. words = line.split() try: ip = geolite2.lookup(words[0]) except: ip = None if ip is not None and ip.location is not None: locs.append(ip.location) if ip is not None and ip.country is not None: if ip.country == 'XK': IPCount["Kosovo"] += 1 else: nation = countries.get(ip.country) IPCount[nation.name] += 1 # Browser Type if "Chrome" in line: browserCount["Chrome"] += 1 elif "Firefox" in line: browserCount["FireFox"] += 1 elif "Safari" in line: browserCount["Safari"] += 1 elif "Explorer" in line: browserCount["Internet Explorer"] += 1 else: browserCount["Other"] += 1 browserCount["Other"] += 1 # Get date of access. try: dtStr = words[3][1:].replace(':', ' ', 1) dt = parseDt(dtStr) accessDt = str(dt.year)[-2:] + '-' + str(dt.month).zfill(2) except: accessDt = '19-01' # Is this is a unique viewer? ipStr = words[0] if ipStr not in users: # Add another user to the count. recordCount[accessDt] += 1 users.add(ipStr) # No matter what, we update the monthly count. monthCount[accessDt] += 1 userCount[ipStr] += 1 # Output any lat/lons we found with open( os.path.join(os.path.dirname(__file__), 'scratch/ipLocDatabase.txt'), 'w') as iplFile: for L in locs: iplFile.write(str(L) + '\n') # Read the IP locations and clean up their foramtting. with open( os.path.join(os.path.dirname(__file__), 'scratch/ipLocDatabase.txt'), 'r') as locFile: markers = locFile.readlines() markers = list(set(markers)) markers = [ x.replace('\n', '').replace('(', '[').replace(')', ']') for x in markers ] # Render the HTML map of IP locations with open(os.path.join(os.path.dirname(__file__), 'static/ipLoc.html'), 'w') as f2: f2.write(template.render(markers=markers)) # Set up plotting: plt.figure(figsize=(15, 15)) ggColors = [x['color'] for x in plt.rcParams['axes.prop_cycle']] # Session counts by month: log = collections.OrderedDict( sorted(recordCount.items(), key=lambda x: x[0])) plt.subplot(3, 1, 1) ax = plt.gca() totalSessions = "{:,}".format(sum(log.values())) creationTime = datetime.now().strftime('%Y-%m-%d') ax.set_title('Session Count By Month. Total: ' + totalSessions + '\nGenerated: ' + creationTime) barRange = list(range(len(log))) plt.bar(barRange, list(log.values()), align='center') plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()]) plt.axis('tight') # Hit counts by month: log = collections.OrderedDict( sorted(monthCount.items(), key=lambda x: x[0])) plt.subplot(3, 1, 2) ax = plt.gca() ax.set_title('Hit Count By Month. Total: ' + "{:,}".format(sum(log.values()))) barRange = list(range(len(log))) plt.bar(barRange, list(log.values()), align='center') plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()]) plt.axis('tight') # Plot the hits per user histogram: userElements = userCount.items() browserElements = browserCount.items() plt.subplot(3, 3, 7) userValues = list(pair[1] for pair in userElements) title = 'Histogram of Hits Per User' plt.title(title) plt.hist(userValues, bins=list(range(0, 50, 5))) # Country hit counts: log = collections.OrderedDict( sorted(IPCount.items(), key=lambda x: x[1], reverse=True)) countryTotal = str(len(log)) # Just look at top 10 countries: for i, k in enumerate(log): if i > 10: del log[k] plt.subplot(3, 3, 8) ax = plt.gca() title = 'Hits by Country. Total Countries: ' + countryTotal ax.set_title(title) people = [x[0:14] for x in log.keys()] y_pos = list(range(len(people))) performance = [x for x in log.values()] ax.barh(y_pos, performance, align='center') ax.set_yticks(y_pos) ax.set_yticklabels(people, fontsize=8) ax.invert_yaxis() # labels read top-to-bottom # Plot of the number of hits by IP address: # u_label_list = list(pair[0] for pair in userElements) # IPCounts = filter(lambda x: x[1] >= 100, [(l, s) for l, s in zip(u_label_list, userValues)]) # colors = ('red', 'green', 'orange', 'cyan', 'brown', 'grey', 'blue', 'indigo', 'beige', 'yellow') # plt.pie(sorted(userValues, reverse=True), colors=colors) # plt.axis("equal") # plt.legend(loc=(-0.15, 0.05), labels=sorted(IPCounts, key = lambda x: x[1], reverse=True), shadow=True) # plt.savefig('USER HITS' + '.png') # plt.show() # plt.close() # def IPConvert(ip): # removePeriods = ''.join(ip.split('.')) # final = removePeriods.replace(':', '') # return int((''.join(ip.split('.')).replace(':',''))) # Browser type breakdown: b_label_list = list(x[0] for x in browserElements if x[0] != 'Other') browserValues = list(int(x[1]) for x in browserElements if x[0] != 'Other') plt.subplot(3, 3, 9) plt.pie(sorted(browserValues, reverse=True), colors=ggColors) browserLabels = [(l, s) for l, s in zip(b_label_list, browserValues)] plt.legend(labels=sorted(browserLabels, key=lambda x: x[1], reverse=True), shadow=True) plt.title('Browser Type Breakdown') # Adjust and write out the image. plt.subplots_adjust(left=0.1, right=0.9) plt.savefig(outFilePath)