def __init__(self, filename): """Input: path and name of the file of a trace; how many filtering steps should be used for sliding window filtering""" self.__id = int(os.path.basename(filename).split(".")[0]) x = [] y = [] with open(filename, "r") as trainfile: trainfile.readline() # skip header for line in trainfile: items = line.split(",", 2) x.append(float(items[0])) y.append(float(items[1])) triplength = distance(x[0], y[0], x[-1], y[-1]) self.triptime = len(x) xvar, yvar = bettervariance(x, y) overlaps = overlap(x, y) self.featurelist = [] self.featurelist.append(triplength) self.featurelist.append(self.triptime) self.featurelist.append(xvar) self.featurelist.append(yvar) self.featurelist.append(overlaps) v, distancecovered = velocities_and_distance_covered(x, y) vfiltered = reject_outliers(v, 3) maxspeed = max(vfiltered) medianspeed = median(vfiltered) meanspeed, varspeed = meanstdv(vfiltered) angles, jumps = self.angles_and_jumps() anglespeed = [speed * angle for (speed, angle) in zip(v, angles)] anglespeedfiltered = reject_outliers(anglespeed, 3) maxanglespeed = max(anglespeedfiltered) mediananglespeed = median(anglespeedfiltered) meananglespeed, varanglespeed = meanstdv(anglespeedfiltered) totalangle = sum(angles) maxangle = max(angles) minangle = min(angles) medianangle = max(angles) acc, dec, stills = getratio(vfiltered, medianspeed) if (acc + dec) == 0: accratio = 0.5 else: accratio = acc / (acc + dec) self.featurelist.append(distancecovered) self.featurelist.append(maxspeed) self.featurelist.append(medianspeed) self.featurelist.append(meanspeed) self.featurelist.append(varspeed) self.featurelist.append(maxanglespeed) self.featurelist.append(mediananglespeed) self.featurelist.append(meananglespeed) self.featurelist.append(varanglespeed) self.featurelist.append(totalangle) self.featurelist.append(medianangle) self.featurelist.append(maxangle) self.featurelist.append(minangle) self.featurelist.append(jumps) self.featurelist.append(accratio) self.featurelist.append(acc) self.featurelist.append(dec) self.featurelist.append(stills)
def analysis(trainfoldername, testfoldername, outdir, referencenum): """ Start the analysis Input: 1) Path to the driver directory 2) Path where the submission file should be written 3) Number of drivers to compare against """ seed(42) start = datetime.now() submission_id = datetime.now().strftime("%H_%M_%B_%d_%Y") trainfolders = [os.path.join(trainfoldername, f) for f in os.listdir(trainfoldername) if os.path.isdir(os.path.join(trainfoldername, f))] referencefolders = [trainfolders[i] for i in sorted(sample(xrange(len(trainfolders)), referencenum))] referencedrivers = [] for referencefolder in referencefolders: referencedrivers.append(Driver(referencefolder)) generatedata(referencedrivers) testdrivers = [] testfolders = [os.path.join(testfoldername, f) for f in os.listdir(testfoldername) if os.path.isdir(os.path.join(testfoldername, f))] for testfolder in testfolders: testdrivers.append(Driver(testfolder)) generatetestdata(testdrivers) results = Parallel(n_jobs=10)(delayed(perform_analysis)(trainfolder) for trainfolder in trainfolders) with open(os.path.join(outdir, "testing_results_{0}.txt".format(submission_id)), 'w') as writefile: for item in results: writefile.write("%.4f\n" % item) mean = sum(results)/len(results) writefile.write("Mean: %.4f\n" % mean) writefile.write("Median: %.4f\n" % median(results)) writefile.write("Min: %.4f\n" % min(results)) print 'Done, elapsed time: %s' % str(datetime.now() - start)
def analysis(trainfoldername, testfoldername, outdir, referencenum): """ Start the analysis Input: 1) Path to the driver directory 2) Path where the submission file should be written 3) Number of drivers to compare against """ seed(42) start = datetime.now() submission_id = datetime.now().strftime("%H_%M_%B_%d_%Y") trainfolders = [ os.path.join(trainfoldername, f) for f in os.listdir(trainfoldername) if os.path.isdir(os.path.join(trainfoldername, f)) ] referencefolders = [ trainfolders[i] for i in sorted(sample(xrange(len(trainfolders)), referencenum)) ] referencedrivers = [] for referencefolder in referencefolders: referencedrivers.append(Driver(referencefolder)) generatedata(referencedrivers) testdrivers = [] testfolders = [ os.path.join(testfoldername, f) for f in os.listdir(testfoldername) if os.path.isdir(os.path.join(testfoldername, f)) ] for testfolder in testfolders: testdrivers.append(Driver(testfolder)) generatetestdata(testdrivers) results = Parallel(n_jobs=10)(delayed(perform_analysis)(trainfolder) for trainfolder in trainfolders) with open( os.path.join(outdir, "testing_results_{0}.txt".format(submission_id)), 'w') as writefile: for item in results: writefile.write("%.4f\n" % item) mean = sum(results) / len(results) writefile.write("Mean: %.4f\n" % mean) writefile.write("Median: %.4f\n" % median(results)) writefile.write("Min: %.4f\n" % min(results)) print 'Done, elapsed time: %s' % str(datetime.now() - start)