def get_dbank_text_features(datum): feat_dict = pos_phrases.get_all(datum) feat_dict.update(pos_syntactic.get_all(datum)) feat_dict.update( psycholinguistic.get_cookie_theft_info_unit_features(datum)) feat_dict.update(psycholinguistic.get_psycholinguistic_features(datum)) feat_dict.update(psycholinguistic.get_spatial_features(datum, 'halves')) feat_dict.update(psycholinguistic.get_spatial_features(datum, 'strips')) feat_dict.update(psycholinguistic.get_spatial_features(datum, 'quadrants')) return feat_dict
def process_blog(blog, name): posts = [] for post_id in blog: post = blog[post_id] if post: print 'Processing %s' % post_id feat_dict = pos_phrases.get_all(post) feat_dict.update(pos_syntactic.get_all(post)) feat_dict.update( psycholinguistic.get_psycholinguistic_features(post)) feat_dict["number_of_sentences"] = len(post) feat_dict['blog'] = name feat_dict['id'] = post_id posts += [feat_dict] return posts
def process_blog(blog, name): posts = [] total = len(blog) for i, post_id in enumerate(blog): post = blog[post_id] if post: print 'Processing %s (%s / %s)' % (post_id, i + 1, total) feat_dict = pos_phrases.get_all(post) feat_dict.update(pos_syntactic.get_all(post)) feat_dict.update( psycholinguistic.get_psycholinguistic_features(post)) feat_dict["number_of_sentences"] = len(post) feat_dict['blog'] = name feat_dict['id'] = post_id posts += [feat_dict] return posts
def get_blog_text_features(datum): feat_dict = pos_phrases.get_all(datum) feat_dict.update(pos_syntactic.get_all(datum)) feat_dict.update(psycholinguistic.get_psycholinguistic_features(datum)) return feat_dict