def run(self): seed_filename = self.pass_in output_filename = self.pass_out status_count_threshold = self.param['status_count_threshold'] following_count_threshold = self.param['following_count_threshold'] seeds = dp_seeds.fromCSVThresholdedEng(seed_filename, status_count_threshold, following_count_threshold) fout = open(output_filename, 'w') for user_id, age_group in seeds: fout.write(str(user_id) + '\t' + str(age_group) + '\n') fout.close()
from dpFeature import Feature import dpAnnotatedSeeds as dp_seeds from dpDataLoader import loadFollowingFromDBofUser class FollowingFeature(Feature): def convertSingle(self, user_id): friends = loadFollowingFromDBofUser(user_id) def _dummy(friend): # insert feature weight return (friend, 1) friends = map(_dummy, friends) return friends if __name__ == "__main__": seed_file = '../data/sampled2.8-annotated.csv' seeds = dp_seeds.fromCSVThresholdedEng(seed_file) feature_gen = FollowingFeature(seeds) fout_name = '../feature_set2/ver2.8-efollowing.libsvm' feature_gen.generateTo(fout_name)