def test_feature(): microblogId, microblog_text, polarity = "234", "我不喜欢你, 你这个愚昧的家伙,脑子有毛病", 1 feature_extracter = FeatureExtractor() words, taggings = feature_extracter.pos_tagging(microblog_text) posCount, negCount = feature_extracter.polarity_count(microblog_text) single_microblog = Microblog(microblogId=microblogId, text=microblog_text, polarity=polarity, negCount=negCount, posCount=posCount, words=words, taggings=taggings, microblogType=1, topic=None, sentiment=None) single_microblog.save()
def complete_microblog(): microblogs = Microblog.objects(polarity='pos', microblogType='training') print(len(microblogs)) new_microblogs = [] for microblog in microblogs: newone = Microblog(microblogId=microblog.microblogId, text=microblog.text, polarity=microblog.polarity, negCount=microblog.negCount, posCount=microblog.posCount, words=microblog.words, taggings=microblog.taggings, microblogType=microblog.microblogType, topic='', sentiment='') new_microblogs.append(newone) Microblog.objects.insert(new_microblogs)
def get_feature_set(microblogType): microblogs = Microblog.objects() words_features = get_words_features_pickle() feature_sets = [(feature_filter(microblog.words, words_features), microblog.polarity) for microblog in microblogs] return feature_sets
def classify_testing(): microblogs = Microblog.objects(microblogType='testing') baseline_method(microblogs) latent_polarity_method(microblogs) test_set = get_feature_set('testing') for (name, input_path) in classifier_path_list: with open(input_path, 'rb') as input_classifier: classifier = pickle.load(input_classifier) save_testing_result(classifier, test_set, name)
def new_microblog(): form = MicroblogForm() if form.validate_on_submit(): microblog = Microblog(body=form.microblog.data,author=current_user) db.session.add(microblog) db.session.commit() flash('Your microblog is now live!') return redirect(url_for('main.new_microblog')) page = request.args.get('page',1,type=int) pagination = current_user.followed_microblogs().paginate( page,current_app.config['POSTS_PER_PAGE']) microblogs = pagination.items return render_template('microblog/new_microblog.html',form=form, microblogs=microblogs, pagination=pagination)
def pickle_words_features(microblogType): microblogs = Microblog.objects() #pickle pattern pattern_induct(microblogs) all_words = [] for microblog in microblogs: all_words.extend(microblog.words) all_words = nltk.FreqDist(all_words) words_features = list(all_words.keys())[:3000] with open(WORDS_FEATURES_PATH, 'wb') as output_file: pickle.dump(words_features, output_file)
def microblog_data_handler(microblog_type): data_filter = DataFilter() microblogs = data_filter.read_and_filter_api_microblog_data(microblog_type) result = [] feature_extractor = FeatureExtractor() for polarity in microblogs: microblog_list = microblogs[polarity] for microblog in microblog_list: #feature extractor microblogId, microblog_raw_text, microblog_text, polarity = microblog[ 0], microblog[1], microblog[2], microblog[3] posCount, negCount = feature_extractor.polarity_count( microblog_text) words, taggings = feature_extractor.pos_tagging(microblog_text) raw_words, raw_taggings = feature_extractor.pos_tagging( microblog_raw_text) single_microblog = Microblog(microblogId=microblogId, text=microblog_text, polarity=polarity, microblogType=microblog_type, topic='', posCount=posCount, negCount=negCount, words=words, taggings=taggings, raw_words=raw_words, raw_taggings=raw_taggings, sentiment='') result.append(single_microblog) return result
def delete_microblog_by_id(microblog_id): microblog = Microblog.objects(microblogId=microblog_id).first() if microblog: microblog.delete()
def delete_all_microblogs_by_type(microblog_type): microblogs = Microblog.objects(microblogType=microblog_type) if microblogs: microblogs.delete()