def next_page(url): try: next = requests.get(url).json() for c in next.get('data'): util.salva_csv('NULL', c.get('id'), util.tokens(c.get('message')), c.get('created_time'), c.get('like_count'), 'NULL') if next.get('paging') is not None: next_page(next.get('paging').get('next')) except Exception: pass
def start(id_post): ACCESS_TOKEN = "EAAE4pgrzWasBAFJH7Ct4ZCdBZApOAsAknEPcT9ucFNxxrhGhUBZBq5gA9idxT452kkRzgdQYoeZAPpoqqaXuLzICR3hbsDLgXV17PFY3vIHtXiZCpjB5qJTzsoWZAAbaNJHRl0b9bt1191IpAMBJgBZBhSG7X1GkeCPZBeyZCuJdSSAiMxXbWN4k7cAbuW4M4SHQZD" BASE_URL = "https://graph.facebook.com/v3.1/" url = BASE_URL + id_post + "?fields=comments.limit(100){id,message,like_count,created_time}&access_token=" + ACCESS_TOKEN try: post = requests.get(url).json() comments = post.get('comments').get('data') for c in comments: util.salva_csv('NULL', c.get('id'), util.tokens(c.get('message')), c.get('created_time'), c.get('like_count'), 'NULL') if post.get('comments').get('paging') is not None: next_page(post.get('comments').get('paging').get('next')) except Exception: pass
def start(): try: tweets = [] for tweet in tweepy.Cursor(api.search, q='bolsonaro OR haddad OR nordeste OR nodestino OR marina OR ciro', tweet_mode="extended", lang="pt-br", sice='2018-10-06', until='2018-10-08').items(): if 'RT' not in tweet.full_text: if 'retweeted_status' in dir(tweet): tweet.full_text = tweet.retweeted_status.full_text else: tweet.full_text = tweet.full_text # print(tweet.full_text) util.salva_csv(tweet.user.id, tweet.id, util.tokens(tweet.full_text), tweet.created_at, tweet.user.location, tweet.user.name) print(tweet.created_at, tweet.full_text) tweets.append(tweet) except tweepy.error.TweepError as et: print(et) except Exception as e: print(e)
def find_like_pattern_0(inputfile, pattern, ms): # Support ignored/accepted ranges marked = sourcemarkers.find_marked_intervals(inputfile.text) marked_tree = intervaltree.IntervalTree([ intervaltree.Interval(b, e) # include e here to simulate closed interval for b, e, t in marked ]) # Tokenize both document and pattern textintervals = [i for i in inputfile.lexintervals if i.int_type == IntervalType.general] pattern_tokens = util.tokens(pattern) pattern_token_texts = [t[2] for t in pattern_tokens] inputfile_tokens = [] for ti in textintervals: tit = inputfile.text[ti.offs:ti.end] titt = util.tokens(tit) ti_tokens = [ (ti.offs + tittn[0], ti.offs + tittn[1], tittn[2]) for tittn in titt ] inputfile_tokens += ti_tokens inputfile_token_texts = [t[2] for t in inputfile_tokens] def jt(token_texts): return ' '.join(token_texts) jp = jt(pattern_token_texts) # ------------------------------- # Search for pattern found = [] for o in range(len(inputfile_tokens) - len(pattern_tokens)): tp = jt(inputfile_token_texts[o:o+len(pattern_tokens)]) r = util.lratio(jp, tp) if r >= ms: found.append((o + 1, r)) # TODO: why +1 makes better there?.. # detect peaks peaks = [] for findex in range(1, len(found) - 1): if found[findex-1][1] <= found[findex][1] <= found[findex+1][1]: peaks.append(found[findex]) # filter nearby intersecting peaks, only leave highest fpeaks = set(peaks) for i1 in range(len(peaks)): for i2 in range(len(peaks)): o1, r1 = peaks[i1] o2, r2 = peaks[i2] if peaks[i1] in fpeaks and peaks[i2] in fpeaks and abs(o1 - o2) < len(pattern_tokens) // 2 and r2 < r1: fpeaks.remove(peaks[i2]) # ------------------------------- # Return results in terms of source doc results = [] for bo, clr in fpeaks: cb = inputfile_tokens[bo][0] ce = inputfile_tokens[bo + len(pattern_tokens) - 1][1] if not marked_tree.overlap(cb, ce): # skip results intersecting with already marked cwords = inputfile_token_texts[bo:bo+len(pattern_tokens)] ctext = inputfile.text[cb:ce] results.append((cb, ce - 1, clr, ctext, cwords)) return results
def itokens(text): return tuple( (n, b, e, s) for n, (b, e, s) in zip(itertools.count(), util.tokens(text)) )