Esempio n. 1
0
def work(modelDir, inputDict):
	''' Run the model in its directory.'''
	outData = {}
	# Run VBAT code.
	source = inputDict["source"]
	year = inputDict["year"]
	station = inputDict["station"]
	parameter = inputDict["weatherParameter"]
	state_city = inputDict["state_city"]
	verifiedData = []
	errorCount = 0
	#check the source using and use the appropriate function
	if source == "METAR":
		data = pullMETAR(year,station,parameter)
		with open(pJoin(modelDir,"weather.csv"),"w") as file:
			file.write(data)
	elif source == "USCRN":
		data = pullUSCRN(year,state_city,parameter)
		with open(pJoin(modelDir,"weather.csv"),"w") as file:
			writer = csv.writer(file)
			writer.writerows([[x] for x in data])

	#writing raw data


	if parameter != "metar" and source == "METAR":#raw metar should not be formated as it is already in its own format and difficult to handle
		verifiedData = [999.9]*8760
		firstDT = dt.datetime(int(year),1,1,0)
		with open(pJoin(modelDir,"weather.csv"),"r") as file:
			reader = csv.reader(file)
			for row in reader:
				if row[1] != "valid" and row[2] != "M":
					d = parseDt(row[1])
					deltatime = d - firstDT
					verifiedData[int(math.floor((deltatime.total_seconds())/(60*60)))] = row[2]

		#storing good data to allOutputData.json and weather.csv
		outData["data"] = verifiedData
		with open(pJoin(modelDir,"weather.csv"),"wb") as file:
			writer = csv.writer(file)
			writer.writerows([[x] for x in verifiedData])
	elif source == "USCRN":
		verifiedData = []#[999.9]*8760
		with open(pJoin(modelDir,"weather.csv"),"r") as file:
			reader = csv.reader(file)
			for row in reader:
				verifiedData.append(row[0])
				print row[0]
		with open(pJoin(modelDir,"weather.csv"),"wb") as file:
			writer = csv.writer(file)
			writer.writerows([[x] for x in verifiedData])

	#checking how many wrong values there are
	for each in verifiedData:
		if each == 999.9:
			errorCount += 1

	outData["errorCount"] = errorCount
	outData["stdout"] = "Success"
	return outData
Esempio n. 2
0
def trafficLogStats(logsPath, outFilePath):
	# Read in a file containing the full access log.
	if logsPath.endswith('.zip'):
		# Support for reading zipped logs.
		zfile = zipfile.ZipFile(logsPath, 'r')
		fname = [x for x in zfile.namelist() if '/' not in x][0]
		zcontent = zfile.open(fname)
		lines = zcontent.readlines()
	else:
		# Support for plain text logs.
		logfile = open(logsPath, 'r')
		lines = logfile.readlines()
		logfile.close()
	# Create data structures for tracking metrics
	recordCount = collections.Counter()
	monthCount = collections.Counter()
	browserCount = collections.Counter()
	IPCount = collections.Counter()
	userCount = collections.Counter()
	users = set()  # Create set of users to prevent duplications.
	# Process the log file to generate hit and session counts.
	# Filter out lines containing these strings.
	for line in lines:
		# Now split and define things.
		words = line.split()
		try:
			ip = geolite2.lookup(words[0])
		except:
			ip = None
		if ip is not None and ip.country is not None:
			if ip.country == 'XK':
				IPCount["Kosovo"] += 1
			else:
				nation = countries.get(ip.country)
				IPCount[nation.name] += 1
		# Browser Type
		if "Chrome" in line:
			browserCount["Chrome"] += 1
		elif "Firefox" in line:
			browserCount["FireFox"] += 1
		elif "Safari" in line:
			browserCount["Safari"] += 1
		elif "Explorer" in line:
			browserCount["Internet Explorer"] += 1
		else:
			browserCount["Other"] += 1
		browserCount["Other"] += 1
		# Get date of access.
		try:
			dtStr = words[3][1:].replace(':', ' ', 1)
			dt = parseDt(dtStr)
			accessDt = str(dt.year) + '-' + str(dt.month).zfill(2)
		except:
			accessDt = '2019-01'
		# Is this is a unique viewer?
		ipStr = words[0]
		if ipStr not in users:
			# Add another user to the count.
			recordCount[accessDt] += 1
			users.add(ipStr)
		# No matter what, we update the monthly count.
		monthCount[accessDt] += 1
		userCount[ipStr] += 1
	# Set up plotting:
	plt.figure(figsize=(15, 15))
	ggColors = [x['color'] for x in plt.rcParams['axes.prop_cycle']]
	# Session counts by month:
	log = collections.OrderedDict(sorted(recordCount.items(), key=lambda x:x[0]))
	plt.subplot(3, 1, 1)
	ax = plt.gca()
	totalSessions = "{:,}".format(sum(log.values()))
	creationTime = datetime.now().strftime('%Y-%m-%d')
	ax.set_title('Session Count By Month. Total: ' + totalSessions + '\nGenerated: ' + creationTime)
	barRange = range(len(log))
	plt.bar(barRange, log.values(), align='center')
	plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()])
	plt.axis('tight')
	# Hit counts by month:
	log = collections.OrderedDict(sorted(monthCount.items(), key=lambda x:x[0]))
	plt.subplot(3, 1, 2)
	ax = plt.gca()
	ax.set_title('Hit Count By Month. Total: ' + "{:,}".format(sum(log.values())))
	barRange = range(len(log))
	plt.bar(barRange, log.values(), align='center')
	plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()])
	plt.axis('tight')
	# Plot the hits per user histogram:
	userElements = userCount.items()
	browserElements = browserCount.items()
	plt.subplot(3, 3, 7)
	userValues = list(pair[1] for pair in userElements)
	title = 'Histogram of Hits Per User'
	plt.title(title)
	plt.hist(userValues, bins=range(0, 50, 5))
	# Country hit counts:
	log = collections.OrderedDict(sorted(IPCount.items(), key=lambda x: x[1], reverse=True))
	countryTotal = str(len(log))
	# Just look at top 10 countries:
	for i, k in enumerate(log):
		if i > 10: del log[k]
	plt.subplot(3, 3, 8)
	ax = plt.gca()
	title = 'Hits by Country. Total Countries: ' + countryTotal
	ax.set_title(title)
	people = [x[0:14] for x in log.keys()]
	y_pos = range(len(people))
	performance = [x for x in log.values()]
	ax.barh(y_pos, performance, align='center')
	ax.set_yticks(y_pos)
	ax.set_yticklabels(people, fontsize=8)
	ax.invert_yaxis()  # labels read top-to-bottom
	# Plot of the number of hits by IP address:
	# u_label_list = list(pair[0] for pair in userElements)
	# IPCounts = filter(lambda x: x[1] >= 100, [(l, s) for l, s in zip(u_label_list, userValues)])
	# colors  = ('red', 'green', 'orange', 'cyan', 'brown', 'grey', 'blue', 'indigo', 'beige', 'yellow')
	# plt.pie(sorted(userValues, reverse=True), colors=colors)
	# plt.axis("equal")
	# plt.legend(loc=(-0.15, 0.05), labels=sorted(IPCounts, key = lambda x: x[1], reverse=True), shadow=True)
	# plt.savefig('USER HITS' + '.png')
	# plt.show()
	# plt.close()
	# def IPConvert(ip):
	# 	removePeriods = ''.join(ip.split('.'))
	# 	final = removePeriods.replace(':', '')
	# 	return int((''.join(ip.split('.')).replace(':','')))
	# Browser type breakdown:
	b_label_list = list(x[0] for x in browserElements if x[0] != 'Other')
	browserValues = list(int(x[1]) for x in browserElements if x[0] != 'Other')
	plt.subplot(3, 3, 9)
	plt.pie(sorted(browserValues, reverse=True), colors=ggColors)
	browserLabels = [(l, s) for l, s in zip(b_label_list, browserValues)]
	plt.legend(labels=sorted(browserLabels, key=lambda x: x[1], reverse=True), shadow=True)
	plt.title('Browser Type Breakdown')
	# Adjust and write out the image.
	plt.subplots_adjust(left=0.1, right=0.9)
	plt.savefig(outFilePath)
Esempio n. 3
0
def trafficLogStats(logsPath, outFilePath):
    # Read in a file containing the full access log.
    if logsPath.endswith('.zip'):
        # Support for reading zipped logs.
        with zipfile.ZipFile(logsPath, 'r') as zfile:
            fname = [x for x in zfile.namelist() if '/' not in x][0]
            with zfile.open(fname) as zcontent_file:
                lines = zcontent_file.readlines()
    else:
        # Support for plain text logs.
        logfile = open(logsPath, 'r')
        lines = logfile.readlines()
        logfile.close()
    # Create data structures for tracking metrics
    recordCount = collections.Counter()
    monthCount = collections.Counter()
    browserCount = collections.Counter()
    IPCount = collections.Counter()
    userCount = collections.Counter()
    users = set()  # Create set of users to prevent duplications.
    locs = []
    # Process the log file to generate hit and session counts.
    # Filter out lines containing these strings.
    for line in lines:
        # Now split and define things.
        words = line.split()
        try:
            ip = geolite2.lookup(words[0])
        except:
            ip = None
        if ip is not None and ip.location is not None:
            locs.append(ip.location)
        if ip is not None and ip.country is not None:
            if ip.country == 'XK':
                IPCount["Kosovo"] += 1
            else:
                nation = countries.get(ip.country)
                IPCount[nation.name] += 1
        # Browser Type
        if "Chrome" in line:
            browserCount["Chrome"] += 1
        elif "Firefox" in line:
            browserCount["FireFox"] += 1
        elif "Safari" in line:
            browserCount["Safari"] += 1
        elif "Explorer" in line:
            browserCount["Internet Explorer"] += 1
        else:
            browserCount["Other"] += 1
        browserCount["Other"] += 1
        # Get date of access.
        try:
            dtStr = words[3][1:].replace(':', ' ', 1)
            dt = parseDt(dtStr)
            accessDt = str(dt.year)[-2:] + '-' + str(dt.month).zfill(2)
        except:
            accessDt = '19-01'
        # Is this is a unique viewer?
        ipStr = words[0]
        if ipStr not in users:
            # Add another user to the count.
            recordCount[accessDt] += 1
            users.add(ipStr)
        # No matter what, we update the monthly count.
        monthCount[accessDt] += 1
        userCount[ipStr] += 1
    # Output any lat/lons we found
    with open(
            os.path.join(os.path.dirname(__file__),
                         'scratch/ipLocDatabase.txt'), 'w') as iplFile:
        for L in locs:
            iplFile.write(str(L) + '\n')
    # Read the IP locations and clean up their foramtting.
    with open(
            os.path.join(os.path.dirname(__file__),
                         'scratch/ipLocDatabase.txt'), 'r') as locFile:
        markers = locFile.readlines()
        markers = list(set(markers))
        markers = [
            x.replace('\n', '').replace('(', '[').replace(')', ']')
            for x in markers
        ]
    # Render the HTML map of IP locations
    with open(os.path.join(os.path.dirname(__file__), 'static/ipLoc.html'),
              'w') as f2:
        f2.write(template.render(markers=markers))
    # Set up plotting:
    plt.figure(figsize=(15, 15))
    ggColors = [x['color'] for x in plt.rcParams['axes.prop_cycle']]
    # Session counts by month:
    log = collections.OrderedDict(
        sorted(recordCount.items(), key=lambda x: x[0]))
    plt.subplot(3, 1, 1)
    ax = plt.gca()
    totalSessions = "{:,}".format(sum(log.values()))
    creationTime = datetime.now().strftime('%Y-%m-%d')
    ax.set_title('Session Count By Month. Total: ' + totalSessions +
                 '\nGenerated: ' + creationTime)
    barRange = list(range(len(log)))
    plt.bar(barRange, list(log.values()), align='center')
    plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()])
    plt.axis('tight')
    # Hit counts by month:
    log = collections.OrderedDict(
        sorted(monthCount.items(), key=lambda x: x[0]))
    plt.subplot(3, 1, 2)
    ax = plt.gca()
    ax.set_title('Hit Count By Month. Total: ' +
                 "{:,}".format(sum(log.values())))
    barRange = list(range(len(log)))
    plt.bar(barRange, list(log.values()), align='center')
    plt.xticks(barRange, [x.replace('/', '\n') for x in log.keys()])
    plt.axis('tight')
    # Plot the hits per user histogram:
    userElements = userCount.items()
    browserElements = browserCount.items()
    plt.subplot(3, 3, 7)
    userValues = list(pair[1] for pair in userElements)
    title = 'Histogram of Hits Per User'
    plt.title(title)
    plt.hist(userValues, bins=list(range(0, 50, 5)))
    # Country hit counts:
    log = collections.OrderedDict(
        sorted(IPCount.items(), key=lambda x: x[1], reverse=True))
    countryTotal = str(len(log))
    # Just look at top 10 countries:
    for i, k in enumerate(log):
        if i > 10: del log[k]
    plt.subplot(3, 3, 8)
    ax = plt.gca()
    title = 'Hits by Country. Total Countries: ' + countryTotal
    ax.set_title(title)
    people = [x[0:14] for x in log.keys()]
    y_pos = list(range(len(people)))
    performance = [x for x in log.values()]
    ax.barh(y_pos, performance, align='center')
    ax.set_yticks(y_pos)
    ax.set_yticklabels(people, fontsize=8)
    ax.invert_yaxis()  # labels read top-to-bottom
    # Plot of the number of hits by IP address:
    # u_label_list = list(pair[0] for pair in userElements)
    # IPCounts = filter(lambda x: x[1] >= 100, [(l, s) for l, s in zip(u_label_list, userValues)])
    # colors  = ('red', 'green', 'orange', 'cyan', 'brown', 'grey', 'blue', 'indigo', 'beige', 'yellow')
    # plt.pie(sorted(userValues, reverse=True), colors=colors)
    # plt.axis("equal")
    # plt.legend(loc=(-0.15, 0.05), labels=sorted(IPCounts, key = lambda x: x[1], reverse=True), shadow=True)
    # plt.savefig('USER HITS' + '.png')
    # plt.show()
    # plt.close()
    # def IPConvert(ip):
    # 	removePeriods = ''.join(ip.split('.'))
    # 	final = removePeriods.replace(':', '')
    # 	return int((''.join(ip.split('.')).replace(':','')))
    # Browser type breakdown:
    b_label_list = list(x[0] for x in browserElements if x[0] != 'Other')
    browserValues = list(int(x[1]) for x in browserElements if x[0] != 'Other')
    plt.subplot(3, 3, 9)
    plt.pie(sorted(browserValues, reverse=True), colors=ggColors)
    browserLabels = [(l, s) for l, s in zip(b_label_list, browserValues)]
    plt.legend(labels=sorted(browserLabels, key=lambda x: x[1], reverse=True),
               shadow=True)
    plt.title('Browser Type Breakdown')
    # Adjust and write out the image.
    plt.subplots_adjust(left=0.1, right=0.9)
    plt.savefig(outFilePath)