def build_key2data(keywords, names): filters = [ featureFilter.Filter(long_time=duration.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=digraph.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=trigraph.LONG_TIME, keywords=keywords), featureFilter.Filter(long_time=digraphRatio.LONG_TIME, keywords=keywords), ] feature_types = featureGenerator.ALL_TYPES people = dict() for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types): keys2data = featureGenerator.create_data(events, filter, feature_type) people[name].update( keys2data ) #{key:data for key,data in keys2data.items() if key in chosen_keys}) return people
def generate_people(filters, feature_types, is_uniform=False): default_filter = filters[0] #should all be identical people = [] for name in os.listdir(main_directory): print("Building " + name) path = os.path.join(main_directory, name) if os.path.isdir(path): events, _ = parseFile.get_events(path, default_filter, is_uniform, with_sample=False) people.append(Person(name, events, filters, feature_types, is_uniform)) return people
def build_key2data(keywords, names): filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords), featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords), featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords), featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords), ] feature_types = featureGenerator.ALL_TYPES people = dict() for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types): keys2data = featureGenerator.create_data(events,filter,feature_type) people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys}) return people
names = ['Gal Oz-Ari', 'Gil Boazi', 'Nir Yaron', 'Guy Levanon', 'Yonathan Schwammenthal', 'Matan Levine', 'Ohad Ben-Or', 'Dor Aharonson', 'Yuval Itkin', 'Yonatan Caspi', 'Noam Greenberg', 'Adi Asher', 'Yovel Rom'] #filter and get feature arrays for them keywords = ('java', 'Java', 'Eclipse', 'IntelliJ', 'IDEA') languages = [featureFilter.ENGLISH] filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords), featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords), featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords), featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords), ] feature_types = featureGenerator.ALL_TYPES people = dict() #name:[time1, time2, time3...] for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types): keys2data = featureGenerator.create_data(events,filter,feature_type) people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys}) loc = r"F:\Clouds\Dropbox\SMOP\AnalysisCompressed\eclipse_english_people2key2data.p" pickle.dump( people, open( loc, "wb+" ) ) #find best features (most common of each type) def best_keys2(people, feature_type): #for people from featureGenerator.create_data keys = dict() for p in people.values(): for k,li in p.items(): if k[-1] == feature_type:
names = names3 #filter and get feature arrays for them keywords = tuple() #('java', 'Java', 'Eclipse', 'IntelliJ', 'IDEA') languages = featureFilter.ALL_LANGUAGES #[featureFilter.ENGLISH] filters = [featureFilter.Filter(long_time=duration.LONG_TIME,keywords=keywords,languages=languages), featureFilter.Filter(long_time=digraph.LONG_TIME,keywords=keywords,languages=languages), featureFilter.Filter(long_time=trigraph.LONG_TIME,keywords=keywords,languages=languages), featureFilter.Filter(long_time=digraphRatio.LONG_TIME,keywords=keywords,languages=languages), ] feature_types = featureGenerator.ALL_TYPES people = dict() #name:[time1, time2, time3...] for name in names: print("Building " + name) path = os.path.join(main_directory, name) events, _ = parseFile.get_events(path, filters[0], is_uniform=False, with_sample=False) people[name] = dict() for filter, feature_type in zip(filters, feature_types): keys2data = featureGenerator.create_data(events,filter,feature_type) people[name].update( keys2data )#{key:data for key,data in keys2data.items() if key in chosen_keys}) loc = "F:\Clouds\Dropbox\SMOP\AnalysisCompressed\general_people2key2data.p" #r"F:\Clouds\Dropbox\SMOP\AnalysisCompressed\eclipse_english_people2key2data.p" pickle.dump( people, open( loc, "wb+" ) ) #find best features (most common of each type) def best_keys2(people, feature_type): #for people from featureGenerator.create_data keys = dict() for p in people.values(): for k,li in p.items():