Ejemplo n.º 1
0
def next_page(url):
    try:
        next = requests.get(url).json()
        for c in next.get('data'):
            util.salva_csv('NULL', c.get('id'), util.tokens(c.get('message')), c.get('created_time'), c.get('like_count'),
                           'NULL')

        if next.get('paging') is not None:
            next_page(next.get('paging').get('next'))
    except Exception:
        pass
Ejemplo n.º 2
0
def start(id_post):
    ACCESS_TOKEN = "EAAE4pgrzWasBAFJH7Ct4ZCdBZApOAsAknEPcT9ucFNxxrhGhUBZBq5gA9idxT452kkRzgdQYoeZAPpoqqaXuLzICR3hbsDLgXV17PFY3vIHtXiZCpjB5qJTzsoWZAAbaNJHRl0b9bt1191IpAMBJgBZBhSG7X1GkeCPZBeyZCuJdSSAiMxXbWN4k7cAbuW4M4SHQZD"
    BASE_URL = "https://graph.facebook.com/v3.1/"

    url = BASE_URL + id_post + "?fields=comments.limit(100){id,message,like_count,created_time}&access_token=" + ACCESS_TOKEN
    try:
        post = requests.get(url).json()
        comments = post.get('comments').get('data')

        for c in comments:

            util.salva_csv('NULL', c.get('id'), util.tokens(c.get('message')), c.get('created_time'), c.get('like_count'),
                           'NULL')

        if post.get('comments').get('paging') is not None:
            next_page(post.get('comments').get('paging').get('next'))
    except Exception:
        pass
Ejemplo n.º 3
0
def start():
    try:
        tweets = []
        for tweet in tweepy.Cursor(api.search, q='bolsonaro OR haddad OR nordeste OR nodestino OR marina OR ciro', tweet_mode="extended", lang="pt-br",
                                   sice='2018-10-06', until='2018-10-08').items():
            if 'RT' not in tweet.full_text:
                if 'retweeted_status' in dir(tweet):
                    tweet.full_text = tweet.retweeted_status.full_text
                else:
                    tweet.full_text = tweet.full_text
                # print(tweet.full_text)
                util.salva_csv(tweet.user.id, tweet.id, util.tokens(tweet.full_text), tweet.created_at, tweet.user.location, tweet.user.name)
                print(tweet.created_at, tweet.full_text)
                tweets.append(tweet)

    except tweepy.error.TweepError as et:
        print(et)
    except Exception as e:
        print(e)
Ejemplo n.º 4
0
def find_like_pattern_0(inputfile, pattern, ms):
    # Support ignored/accepted ranges
    marked = sourcemarkers.find_marked_intervals(inputfile.text)
    marked_tree = intervaltree.IntervalTree([
        intervaltree.Interval(b, e)  # include e here to simulate closed interval
        for b, e, t in marked
    ])

    # Tokenize both document and pattern
    textintervals = [i for i in inputfile.lexintervals if i.int_type == IntervalType.general]

    pattern_tokens = util.tokens(pattern)
    pattern_token_texts = [t[2] for t in pattern_tokens]

    inputfile_tokens = []
    for ti in textintervals:
        tit = inputfile.text[ti.offs:ti.end]
        titt = util.tokens(tit)
        ti_tokens = [
            (ti.offs + tittn[0], ti.offs + tittn[1], tittn[2])
            for tittn in titt
        ]
        inputfile_tokens += ti_tokens

    inputfile_token_texts = [t[2] for t in inputfile_tokens]

    def jt(token_texts):
        return ' '.join(token_texts)
    jp = jt(pattern_token_texts)

    # -------------------------------
    # Search for pattern

    found = []

    for o in range(len(inputfile_tokens) - len(pattern_tokens)):
        tp = jt(inputfile_token_texts[o:o+len(pattern_tokens)])
        r = util.lratio(jp, tp)
        if r >= ms:
            found.append((o + 1, r))  # TODO: why +1 makes better there?..

    # detect peaks
    peaks = []
    for findex in range(1, len(found) - 1):
        if found[findex-1][1] <= found[findex][1] <= found[findex+1][1]:
            peaks.append(found[findex])

    # filter nearby intersecting peaks, only leave highest
    fpeaks = set(peaks)

    for i1 in range(len(peaks)):
        for i2 in range(len(peaks)):
            o1, r1 = peaks[i1]
            o2, r2 = peaks[i2]
            if peaks[i1] in fpeaks and peaks[i2] in fpeaks and abs(o1 - o2) < len(pattern_tokens) // 2 and r2 < r1:
                fpeaks.remove(peaks[i2])

    # -------------------------------
    # Return results in terms of source doc

    results = []
    for bo, clr in fpeaks:
        cb = inputfile_tokens[bo][0]
        ce = inputfile_tokens[bo + len(pattern_tokens) - 1][1]

        if not marked_tree.overlap(cb, ce):  # skip results intersecting with already marked
            cwords = inputfile_token_texts[bo:bo+len(pattern_tokens)]
            ctext = inputfile.text[cb:ce]
            results.append((cb, ce - 1, clr, ctext, cwords))

    return results
Ejemplo n.º 5
0
 def itokens(text):
     return tuple(
         (n, b, e, s)
         for n, (b, e, s)
         in zip(itertools.count(), util.tokens(text))
     )