def post(self): text = api.payload["text"] user = api.payload["user"] if len(text) > 0: tweet = Tweet() tweet.text = text user = db.session.query(User).filter_by(name=user).first() tweet.user_id = user.id db.session.add(tweet) db.session.commit() return tweet, 201 else: return abort(422, "Tweet text can't be empty")
def create_tweet(user=None): if not user: abort(401) requested_object = request.get_json() tweet = Tweet() tweet.user_id = user.id try: for attr in ['text']: setattr(tweet, attr, requested_object[attr]) except KeyError: return '', 400 db.session.add(tweet) db.session.commit() return tweet_schema.jsonify(tweet)
def result(): selectedChoices = ChoiceObj('attractions', session.get('selected')) form_splace = SearchPlaceForm() form_stweets = SearchTweetsForm(obj=selectedChoices) if form_stweets.validate_on_submit(): session['selected'] = form_stweets.multi_attractions.data place_name = form_stweets.place.data latitude = form_stweets.latitude.data longitude = form_stweets.longitude.data attractions = session.get('selected') range_dist = form_stweets.range_dist.data days_before = form_stweets.days_before.data # CRAWLING twitter_crawler = TwitterCrawler(current_app) df_attractions = twitter_crawler.fetch_tweets_from_attractions( attractions, int(days_before), float(latitude), float(longitude), int(range_dist), place_name) # if data from crawling less than 20, return notification if len(df_attractions) < 20: return render_template('notification.html') # insert into crawler table crawler = Crawler() crawler.timestamp = datetime.now(pytz.timezone('Asia/Jakarta')) db.session.add(crawler) db.session.commit() # insert into attractions table attractions_lower = [x.lower() for x in attractions] att = Attractions() att.attractions = ','.join(attractions_lower) att.crawler_id = crawler.id db.session.add(att) db.session.commit() # insert into tweet table for _, row in df_attractions.iterrows(): tweet = Tweet() tweet.user_id = row['user_id'] tweet.username = row['username'] tweet.created = row['created_at'] tweet.text = row['text'] tweet.latitude = row['latitude'] tweet.longitude = row['longitude'] tweet.crawler_id = crawler.id db.session.add(tweet) db.session.commit() # PREPROCESSING tweets = Tweet.query.filter_by(crawler_id=crawler.id) attractions = Attractions.query.filter_by(crawler_id=crawler.id) # change attractions into list list_attractions = [] for a in attractions: list_attractions.append(a.attractions) list_attractions = ''.join(list_attractions).split(',') # separate text into list list_tweets = [] for t in tweets: id_tweet = [t.id, t.text] list_tweets.append(id_tweet) # define normalizer = Normalize() tokenizer = Tokenize() symspell = SymSpell(max_dictionary_edit_distance=3) SITE_ROOT = os.path.abspath(os.path.dirname(__file__)) json_url = os.path.join(SITE_ROOT, "..\data", "corpus_complete_model.json") symspell.load_complete_model_from_json(json_url, encoding="ISO-8859-1") # do preprocess result = [] for tweet in list_tweets: id, text = tweet[0], tweet[1] # normalize tweet_norm = normalizer.remove_ascii_unicode(text) tweet_norm = normalizer.remove_rt_fav(tweet_norm) tweet_norm = normalizer.lower_text(tweet_norm) tweet_norm = normalizer.remove_newline(tweet_norm) tweet_norm = normalizer.remove_url(tweet_norm) tweet_norm = normalizer.remove_emoticon(tweet_norm) tweet_norm = normalizer.remove_hashtag_mention(tweet_norm) tweet_norm = normalizer.remove_punctuation(tweet_norm) # tokenize tweet_tok = tokenizer.WordTokenize(tweet_norm, removepunct=True) # spell correction temp = [] for token in tweet_tok: suggestion = symspell.lookup(phrase=token, verbosity=1, max_edit_distance=3) # option if there is no suggestion if len(suggestion) > 0: get_suggestion = str(suggestion[0]).split(':')[0] temp.append(get_suggestion) else: temp.append(token) tweet_prepared = ' '.join(temp) # join attraction with strip tweet_prepared = normalizer.join_attraction( tweet_prepared, list_attractions) id_tweet_prepared = [id, tweet_prepared] result.append(id_tweet_prepared) # insert into table preprocess for res in result: id, text = res[0], res[1] tb_preprocess = Preprocess() tb_preprocess.text = text tb_preprocess.tweet_id = id tb_preprocess.crawler_id = crawler.id db.session.add(tb_preprocess) db.session.commit() # POS TAGGING tweets_preprocessed = Preprocess.query.filter_by(crawler_id=crawler.id) # get text from table Preprocess list_tweets = [] for t in tweets_preprocessed: tid_tweet = [t.tweet_id, t.text] list_tweets.append(tid_tweet) # path SITE_ROOT = os.path.abspath(os.path.dirname(__file__)) lexicon_url = os.path.join(SITE_ROOT, "..\data", "Lexicon.trn") ngram_url = os.path.join(SITE_ROOT, "..\data", "Ngram.trn") # initialize tagger = MainTagger(lexicon_url, ngram_url, 0, 3, 3, 0, 0, False, 0.2, 0, 500.0, 1) tokenize = Tokenization() # do pos tagging result = [] for tweet in list_tweets: tweet_id, text = tweet[0], tweet[1] if len(text) == 0: tid_text = [tweet_id, text] result.append(tid_text) else: if len(text.split(' ')) == 1: text = text + ' ini' out = tokenize.sentence_extraction(tokenize.cleaning(text)) join_word = [] for o in out: strtag = " ".join(tokenize.tokenisasi_kalimat(o)).strip() join_word += [" ".join(tagger.taggingStr(strtag))] tid_text = [tweet_id, join_word] result.append(tid_text) # insert into table preprocess for tweet in result: tweet_id, text = tweet[0], tweet[1] tweet_str = ''.join(text) tb_postag = PosTag() tb_postag.text = tweet_str tb_postag.tweet_id = tweet_id tb_postag.crawler_id = crawler.id db.session.add(tb_postag) db.session.commit() # PENENTUAN KELAS Ccon = ['JJ', 'NN', 'NNP', 'NNG', 'VBI', 'VBT'] Cfunc = [ 'OP', 'CP', 'GM', ';', ':', '"', '.', ',', '-', '...', 'RB', 'IN', 'MD', 'CC', 'SC', 'DT', 'UH', 'CDO', 'CDC', 'CDP', 'CDI', 'PRP', 'WP', 'PRN', 'PRL', 'NEG', 'SYM', 'RP', 'FW' ] tweets_tagged = PosTag.query.filter_by(crawler_id=crawler.id) # get text from table PostTag list_tweets = [] for t in tweets_tagged: tid_tweet = [t.tweet_id, t.text] list_tweets.append(tid_tweet) # do penentuan kelas result = [] for tweet in list_tweets: tweet_id, text = tweet[0], tweet[1] if len(text) > 0: text_split = text.split(' ') doc_complete = {"con": [], "func": []} con = [] func = [] for word in text_split: w = word.split('/', 1)[0] tag = word.split('/', 1)[1] if tag in Ccon: con.append(word) elif tag in Cfunc: func.append(word) doc_complete["con"].append(' '.join(con)) doc_complete["func"].append(' '.join(func)) else: doc_complete["con"].append(text) doc_complete["func"].append(text) result.append([tweet_id, doc_complete]) # insert into table penentuan kelas for tweet in result: tweet_id, text = tweet[0], tweet[1] content, function = ''.join(text["con"]), ''.join(text["func"]) tb_penentuan_kelas = PenentuanKelas() tb_penentuan_kelas.content = content tb_penentuan_kelas.function = function tb_penentuan_kelas.tweet_id = tweet_id tb_penentuan_kelas.crawler_id = crawler.id db.session.add(tb_penentuan_kelas) db.session.commit() # LDA tweets_penentuan_kelas = PenentuanKelas.query.filter_by( crawler_id=crawler.id) # get tweets content tweets_content_tagged = [] for tweet in tweets_penentuan_kelas: tweets_content_tagged.append(tweet.content) # separate word and tag documents = [] for tweet in tweets_content_tagged: tweet_split = tweet.split(' ') temp = [] for word in tweet_split: w = word.split("/", 1)[0] temp.append(w) documents.append(temp) # do process lda lda = LdaModel(documents, int(4), float(0.001), float(0.001), int(1000)) result = lda.get_topic_word_pwz(tweets_content_tagged) # insert into table ldapwz for r in result: topic, word, pwz = r[0], r[1], r[2] tb_ldapwz = LdaPWZ() tb_ldapwz.topic = topic tb_ldapwz.word = word tb_ldapwz.pwz = pwz tb_ldapwz.crawler_id = crawler.id db.session.add(tb_ldapwz) db.session.commit() # GRAMMAR STORY ldapwz = LdaPWZ.query.filter_by(crawler_id=crawler.id) # get topic with words in dictionary dict_ldapwz = defaultdict(list) for data in ldapwz: dict_ldapwz[data.topic].append([data.word, data.pwz]) # initialize cfg_informasi = CFG_Informasi() cfg_cerita = CFG_Cerita() # create sentence for story dict_story_informasi = cfg_informasi.create_sentences_from_data( dict(dict_ldapwz)) dict_story_cerita = cfg_cerita.create_sentences_from_data( dict(dict_ldapwz)) # join into dict_story dict_story = defaultdict(list) for d in (dict_story_informasi, dict_story_cerita): for key, value in d.items(): dict_story[key].append('. '.join(i.capitalize() for i in value)) # insert into table GrammarStory for topic, stories in dict_story.items(): # insert informasi tb_grammar_story = GrammarStory() tb_grammar_story.topic = topic tb_grammar_story.rules = 'informasi' tb_grammar_story.story = stories[0] tb_grammar_story.crawler_id = crawler.id db.session.add(tb_grammar_story) db.session.commit() # insert cerita tb_grammar_story = GrammarStory() tb_grammar_story.topic = topic tb_grammar_story.rules = 'cerita' tb_grammar_story.story = stories[1] tb_grammar_story.crawler_id = crawler.id db.session.add(tb_grammar_story) db.session.commit() c = Crawler.query.order_by(Crawler.id.desc()).all() return render_template("stories.html", crawler=c, form_stweets=form_stweets)