def collectUserFeatures(data, user, featureFunction): directory = data + '/' + user features = [] for site in os.listdir(directory): path = directory + '/' + site stream = util.filterKeystrokes(util.openStream(path)) features.append(featureFunction(stream)) return chain(*features)
def extractFeatureVectors(data, statisticFn): userList = os.listdir(data) count = 0 for user in userList: # collect all segmented data userdir = data + '/' + user for site in os.listdir(userdir): path = userdir + '/' + site stream = util.filterKeystrokes(util.openStream(path)) sessions = util.segmentStream(stream, 600) for session in sessions: # Generate each feature anyEmptyFeatures = False vector = [count] for ind in FEATURE_INDICES: features = viz.collectFeatures(session, viz.FEATURES[ind]) #if len(features) < 2: # anyEmptyFeatures = True # break vector.extend(statisticFn(features)) if not anyEmptyFeatures: yield vector count += 1
def main(args): createDirectoryIfNotExist(args.odir) userList = os.listdir(args.dir) for user in userList: createDirectoryIfNotExist(args.odir + '/' + user) sites = os.listdir(args.dir + '/' + user) for site in sites: relName = user + '/' + site createDirectoryIfNotExist(args.odir + '/' + relName) stream = util.filterKeystrokes(util.openStream(args.dir + '/' + relName)) sessions = util.segmentStream(stream) # We want histograms of keystroke usage per user allLengths = list(util.getAllKeystrokeLengths(stream)) if len(allLengths) > 0: plt.clf() plt.hist(list(allLengths), 200) plt.savefig(args.odir + '/' + user + '/all-kl-' + site + '.png') for i in range(len(keyCombos)): lengths = list(util.getKeystrokeLengths(stream, keyCombos[i])) if len(lengths) > 0: plt.clf() plt.hist(list(lengths), 200) plt.savefig(args.odir + '/' + user + '/' + names[i] + '-kl-' + site + '.png') # Also want histograms of word data, per user wordData = zip(*chain(*[util.getWordData(s) for s in sessions])) if len(wordData) > 0 and len(wordData[0]) > 0: plt.clf() plt.hist(list(d for d in wordData[0] if abs(d) < 10), 200) plt.savefig(args.odir + '/' + user + '/word-dur-' + site + '.png') plt.clf() plt.hist(list(wordData[1]), 200) plt.savefig(args.odir + '/' + user + '/word-len-' + site + '.png') # Key overlaps #keyOverlaps = list(chain.from_iterable(util.getKeyOverlaps(s) for s in sessions)) keyOverlaps = list(x for x in util.getKeyOverlaps(stream) if abs(x) < 5) if len(keyOverlaps) > 0: plt.clf() plt.hist(keyOverlaps, 100) plt.savefig(args.odir + '/' + user + '/overlap-' + site + '.png') # Word pauses wordPauses = list(x for x in util.getWordPauses(stream) if abs(x) < 60) if len(wordPauses) > 0: plt.clf() plt.hist(wordPauses, 100) plt.savefig(args.odir + '/' + user + '/word-pause-' + site + '.png') # Time between shift key and modified key shiftTime = list(x for x in util.getModifierDelays(stream, util._SHIFT) if abs(x) < 5) if len(shiftTime) > 0: plt.clf() plt.hist(shiftTime, 100) plt.savefig(args.odir + '/' + user + '/shift-delay-' + site + '.png') # Time between shift-to-shift shiftShift= list(x for x in util.getModifierDelays(stream, util._SHIFT) if abs(x) < 1200) if len(shiftShift) > 0: plt.clf() plt.hist(shiftShift, 100) plt.savefig(args.odir + '/' + user + '/shift-shift-' + site + '.png')