Exemple #1
0
def collectUserFeatures(data, user, featureFunction):
    directory = data + '/' + user
    features = []
    for site in os.listdir(directory):
        path = directory + '/' + site
        stream = util.filterKeystrokes(util.openStream(path))
        features.append(featureFunction(stream))
    return chain(*features)
Exemple #2
0
def collectUserFeatures(data, user, featureFunction):
  directory = data + '/' + user
  features = []
  for site in os.listdir(directory):
    path = directory + '/' + site
    stream = util.filterKeystrokes(util.openStream(path))
    features.append(featureFunction(stream))
  return chain(*features)
def extractFeatureVectors(data, statisticFn):
    userList = os.listdir(data)
    count = 0
    for user in userList:  # collect all segmented data
        userdir = data + '/' + user
        for site in os.listdir(userdir):
            path = userdir + '/' + site
            stream = util.filterKeystrokes(util.openStream(path))
            sessions = util.segmentStream(stream, 600)
            for session in sessions:
                # Generate each feature
                anyEmptyFeatures = False
                vector = [count]
                for ind in FEATURE_INDICES:
                    features = viz.collectFeatures(session, viz.FEATURES[ind])
                    #if len(features) < 2:
                    #  anyEmptyFeatures = True
                    #  break
                    vector.extend(statisticFn(features))
                if not anyEmptyFeatures: yield vector
        count += 1
Exemple #4
0
def extractFeatureVectors(data, statisticFn):
  userList = os.listdir(data)
  count = 0
  for user in userList: # collect all segmented data
    userdir = data + '/' + user
    for site in os.listdir(userdir):
      path = userdir + '/' + site
      stream = util.filterKeystrokes(util.openStream(path))
      sessions = util.segmentStream(stream, 600)
      for session in sessions:
        # Generate each feature
        anyEmptyFeatures = False
        vector = [count]
        for ind in FEATURE_INDICES:
          features = viz.collectFeatures(session, viz.FEATURES[ind])
          #if len(features) < 2:
          #  anyEmptyFeatures = True
          #  break
          vector.extend(statisticFn(features))
        if not anyEmptyFeatures: yield vector
    count += 1
def main(args):
  createDirectoryIfNotExist(args.odir)

  userList = os.listdir(args.dir)
  for user in userList:
    createDirectoryIfNotExist(args.odir + '/' + user)
    sites = os.listdir(args.dir + '/' + user)
    for site in sites:
      relName = user + '/' + site
      createDirectoryIfNotExist(args.odir + '/' + relName)
      stream = util.filterKeystrokes(util.openStream(args.dir + '/' + relName))
      sessions = util.segmentStream(stream)

      # We want histograms of keystroke usage per user
      allLengths = list(util.getAllKeystrokeLengths(stream))
      if len(allLengths) > 0:
        plt.clf()
        plt.hist(list(allLengths), 200)
        plt.savefig(args.odir + '/' + user + '/all-kl-' + site + '.png')

      for i in range(len(keyCombos)):
        lengths = list(util.getKeystrokeLengths(stream, keyCombos[i]))
        if len(lengths) > 0:
          plt.clf()
          plt.hist(list(lengths), 200)
          plt.savefig(args.odir + '/' + user + '/' + names[i] + '-kl-' + site + '.png')

      # Also want histograms of word data, per user
      wordData = zip(*chain(*[util.getWordData(s) for s in sessions]))
      if len(wordData) > 0 and len(wordData[0]) > 0:
        plt.clf()
        plt.hist(list(d for d in wordData[0] if abs(d) < 10), 200)
        plt.savefig(args.odir + '/' + user + '/word-dur-' + site + '.png')
        plt.clf()
        plt.hist(list(wordData[1]), 200)
        plt.savefig(args.odir + '/' + user + '/word-len-' + site + '.png')

      # Key overlaps
      #keyOverlaps = list(chain.from_iterable(util.getKeyOverlaps(s) for s in sessions))
      keyOverlaps = list(x for x in util.getKeyOverlaps(stream) if abs(x) < 5)
      if len(keyOverlaps) > 0:
        plt.clf()
        plt.hist(keyOverlaps, 100)
        plt.savefig(args.odir + '/' + user + '/overlap-' + site + '.png')

      # Word pauses
      wordPauses = list(x for x in util.getWordPauses(stream) if abs(x) < 60)
      if len(wordPauses) > 0:
        plt.clf()
        plt.hist(wordPauses, 100)
        plt.savefig(args.odir + '/' + user + '/word-pause-' + site + '.png')

      # Time between shift key and modified key
      shiftTime = list(x for x in util.getModifierDelays(stream, util._SHIFT) if abs(x) < 5)
      if len(shiftTime) > 0:
        plt.clf()
        plt.hist(shiftTime, 100)
        plt.savefig(args.odir + '/' + user + '/shift-delay-' + site + '.png')

      # Time between shift-to-shift
      shiftShift= list(x for x in util.getModifierDelays(stream, util._SHIFT) if abs(x) < 1200)
      if len(shiftShift) > 0:
        plt.clf()
        plt.hist(shiftShift, 100)
        plt.savefig(args.odir + '/' + user + '/shift-shift-' + site + '.png')