def build_key2data(keywords, names): filters = [ featureFilter.Filter(long_time=duration.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=digraph.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=trigraph.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=digraphRatio.LONG_TIME, keywords=keywords), ] feature_types = featureGenerator.ALL_TYPES people = dict() for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types): keys2data = featureGenerator.create_data(events, filter, feature_type) people[name].update( keys2data ) #{key:data for key,data in keys2data.items() if key in chosen_keys}) return people
def best_people(keywords=[], languages=featureFilter.ALL_LANGUAGES): "Returns the people who typed the most under this restriction" filters = [ featureFilter.Filter(long_time=duration.LONG_TIME, keywords=keywords, languages=languages), featureFilter.Filter(long_time=digraph.LONG_TIME, keywords=keywords, languages=languages), featureFilter.Filter(long_time=trigraph.LONG_TIME, keywords=keywords, languages=languages), featureFilter.Filter(long_time=digraphRatio.LONG_TIME, keywords=keywords, languages=languages), ] feature_types = featureGenerator.ALL_TYPES people = featureComparer.generate_people(filters, feature_types, is_uniform=False) arr = sorted([(sum([feature.count for feature in p.features.values()]), p.name) for p in people], reverse=True) for v, name in arr: print(v, name) return people
def build_database(filename, keywords=[], languages=featureFilter.ALL_LANGUAGES, update=False): filters = [ featureFilter.Filter(long_time=duration.LONG_TIME, keywords=keywords, languages=languages), featureFilter.Filter(long_time=digraph.LONG_TIME, keywords=keywords, languages=languages), featureFilter.Filter(long_time=trigraph.LONG_TIME, keywords=keywords, languages=languages), featureFilter.Filter(long_time=digraphRatio.LONG_TIME, keywords=keywords, languages=languages), ] feature_types = featureGenerator.ALL_TYPES people, samples = featureComparer.generate_people_samples(filters, feature_types, is_uniform=False) if update: import pickle loc = r"F:\Clouds\Dropbox\SMOP\AnalysisCompressed\people_" + filename + ".p" pickle.dump(people, open(loc, "wb+")) loc = r"F:\Clouds\Dropbox\SMOP\AnalysisCompressed\samples_" + filename + ".p" pickle.dump(samples, open(loc, "wb+")) return people, samples
def load_all(): filter = featureFilter.Filter(languages=trigraphFilter.ALL_LANGUAGES, stdev_factor=0, keywords=[], long_time=0.8) people = [] for name in os.listdir(main_directory): print("Building " + name) path = os.path.join(main_directory, name) if os.path.isdir(path): people.append(Person(name, path, filter)) return [p for p in people if p.sessions]
import matplotlib.pyplot as plt results_directory = parseFile.results_directory main_directory = parseFile.main_directory #choose the names of people i want to test #names = ['Gal Oz-Ari', 'Gil Boazi', 'Nir Yaron', 'Guy Levanon', 'Yonathan Schwammenthal', 'Matan Levine', 'Ohad Ben-Or', 'Dor Aharonson', 'Yuval Itkin', 'Yonatan Caspi', 'Noam Greenberg', 'Adi Asher', 'Yovel Rom'] names1 = ['Adi Asher', 'Alon Gal', 'Amitai Nevo', 'Dor Aharonson', 'Efi Sapir', 'Elad Kliger', 'Elisha Modelevsky', 'Gal Oz-Ari', 'Gallil Maimon', 'Gil Boazi', 'Gilad Samuels', 'Guy Levanon', 'Ido Zemach', 'Itay Efraim', 'Matan Levine', 'Matan Seri', 'Nir Yaron', 'Noam Greenberg', 'Ohad Ben-Or', 'Omer Deutsch', 'Or Johnson Ezra', 'Or Sagy', 'Shaked Rosenstein', 'Yonatan Caspi', 'Yonathan Schwammenthal', 'Yotam Sali', 'Yovel Rom', 'Yuval Itkin'] names2 = ['Adi Asher', 'Amitai Nevo', 'Dor Aharonson', 'Elad Kliger', 'Elisha Modelevsky', 'Gal Oz-Ari', 'Gallil Maimon', 'Gil Boazi', 'Gilad Samuels', 'Guy Levanon', 'Ido Zemach', 'Matan Levine', 'Nir Yaron', 'Noam Greenberg', 'Ohad Ben-Or', 'Omer Deutsch', 'Or Johnson Ezra', 'Or Sagy', 'Shaked Rosenstein', 'Yonatan Caspi', 'Yonathan Schwammenthal', 'Yovel Rom', 'Yuval Itkin'] names3 = ['Adi Asher', 'Dor Aharonson', 'Elad Kliger', 'Elisha Modelevsky', 'Gal Oz-Ari', 'Gallil Maimon', 'Gil Boazi', 'Gilad Samuels', 'Guy Levanon', 'Matan Levine', 'Nir Yaron', 'Noam Greenberg', 'Ohad Ben-Or', 'Or Johnson Ezra', 'Shaked Rosenstein', 'Yonathan Schwammenthal', 'Yovel Rom', 'Yuval Itkin'] names = names3 #filter and get feature arrays for them keywords = tuple() #('java', 'Java', 'Eclipse', 'IntelliJ', 'IDEA') languages = featureFilter.ALL_LANGUAGES #[featureFilter.ENGLISH] filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords,languages=languages), featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords,languages=languages), featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords,languages=languages), featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords,languages=languages), ] feature_types = featureGenerator.ALL_TYPES people = dict() #name:[time1, time2, time3...] for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types): keys2data = featureGenerator.create_data(events,filter,feature_type) people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys})
results_path = os.path.join(main_directory, name, results_directory) if os.path.exists(results_path): events = parseFile.load_all_standard_sessions(results_path) keys = keys.union({nm for ev in events for nm in ev.names}) #keys = {'end', 'd', 'v', '7', 'right alt', 'home', 'esc', 'f5', '`', 'page down', '[', 'f11', 'right shift', 'b', 'm', 'p', 'q', '?', '3', 'c', '8', 'u', 'g', 'enter', '\\', '9', 's', 'down', 'f1', "'", 'j', 'f9', 'y', 'f3', '.', 'f', 'insert', '4', 'f2', 'delete', 'break', 'f4', 'k', 'application', 'i', ',', 'f10', 'page up', '?', 'z', '*', 'f8', 'a', '5', 'print screen', 'sys req', '2', 't', '0', 'l', 'left windows', '?', 'right ctrl', 'windows', 'tab', 'f12', 'r', 'alt', 'space', 'w', '/', 'n', 'scroll lock', '=', '6', 'e', '1', 'shift', 'right windows', 'f7', 'left', ']', ';', 'o', 'right', '<00>', '+', 'backspace', 'up', 'x', 'ctrl', 'f6', 'caps lock', '-', 'del', 'h'} #then i chose keys = {'d', 'v', 'right alt', '[', 'right shift', 'b', 'm', 'p', 'q', 'c', 'u', 'g', 'enter', '\\', 's', "'", 'j', 'y', '.', 'f', 'k', 'i', ',', 'z', '*', 'a', 't', 'l', 'right ctrl', 'tab', 'r', 'alt', 'space', 'w', '/', 'n', '=', 'e', 'shift', ']', ';', 'o', '+', 'x', 'ctrl', '-', 'h'} #estimate time distribution to find long_time import parseFile import featureFilter import featureGenerator import featureComparer import duration, digraph, trigraph, digraphRatio filter = featureFilter.Filter() filters = [filter] feature_types = [featureGenerator.TRIGRAPH_TYPE] people = featureComparer.generate_people(filters, feature_types, False) ts = [ f.mean + 2 * f.stdev for p in people for f in p.features.values() if f.count > 1 and f.mean > 0 ] #### def make_bar_chart(names, values): import numpy as np import matplotlib.pyplot as plt
person.features[key].count for person in people if key in person.features ]) keys.append((keyCount, key)) list_keys.append(sorted(keys, reverse=True)) for li in list_keys: print(li[:15]) chosen_keys = [('space', 0), ('e', 0), ('i', 'n', 1), ('f', 'i', 1), ('t', 'h', 'e', 2), ('i', 'n', 'g', 2), ('t', 'h', 'e', 3), ('i', 'n', 'g', 3)] #features distribution for two people filters = [ featureFilter.Filter(long_time=duration.LONG_TIME), featureFilter.Filter(long_time=digraph.LONG_TIME), featureFilter.Filter(long_time=trigraph.LONG_TIME), featureFilter.Filter(long_time=digraphRatio.LONG_TIME), ] feature_types = featureGenerator.ALL_TYPES names = ["Nir Yaron", "Guy Levanon", "Noam Greenberg"] people = dict() for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filter, is_uniform=False, with_sample=False) people[name] = dict()
fit = sorted(fit, reverse=True) idx = [n for v, n in fit].index(sample.name) print(sample.name, idx, fit[:5]) #choose the names of people i want to test names = [ 'Gal Oz-Ari', 'Gil Boazi', 'Nir Yaron', 'Guy Levanon', 'Yonathan Schwammenthal', 'Matan Levine', 'Ohad Ben-Or', 'Dor Aharonson', 'Yuval Itkin', 'Yonatan Caspi', 'Noam Greenberg', 'Adi Asher', 'Yovel Rom' ] #filter and get feature arrays for them keywords = ('java', 'Java', 'Eclipse', 'IntelliJ', 'IDEA') languages = [featureFilter.ENGLISH] filters = [ featureFilter.Filter(long_time=duration.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=digraph.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=trigraph.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=digraphRatio.LONG_TIME, keywords=keywords), ] feature_types = featureGenerator.ALL_TYPES people = dict() #name:[time1, time2, time3...] for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types):