예제 #1
0
def trends():
    full_db = load_db(database_path=DATABASE_PATH)

    db_init_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['initial_timestamp'],
                                       input_format=time_format_full_with_timezone)

    db_update_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['timestamp'],
                                         input_format=time_format_full_with_timezone)


    print("time since app start: {:.2f} minutes".format((time.time() - start_time) / 60))
    print("time since database init: {}".format(
        (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp)))
    print("time since last update: {:.2f} minutes".format(
        (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds / 60))
    print('\ndebug:')
    print('time now: {}'.format(datetime.datetime.now(tz=pytz.utc)))
    print('db init time: {}'.format(db_init_timestamp))
    print('diff: {}'.format(datetime.datetime.now(tz=pytz.utc) - db_init_timestamp))

    trends_output = {
        "results": full_db['trends']['include_hashtags'],
        "status": 'ok'
    }

    return jsonify(trends_output)
예제 #2
0
def get_top_hashtags_from_twitter(country='Japan', debug=False, cache_duration_mins=15, append_db=True):
    cache_db = load_db(database_path=db_path, debug=False)
    hashtags_cache = cache_db['hashtags']

    # compare db and now
    db_timestamp = str_2_datetime(hashtags_cache['timestamp'], input_format=time_format_full_with_timezone)
    db_timestamp = db_timestamp.astimezone(tz=pytz.utc)

    rq_timestamp = datetime.datetime.now(tz=pytz.utc)

    time_diff = rq_timestamp - db_timestamp
    print('time since last hashtags API call: {}'.format(time_diff))
    if time_diff.seconds < cache_duration_mins * 60:
        # DB
        output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_hashtags_from_twitter_api(country=country, debug=debug)
        # update
        output_list = json.loads(output_json)

        if append_db:
            output_list = hashtags_cache['content'] + output_list

        cache_db['hashtags']['content'] = output_list
        cache_db['hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)

        update_db(cache_db, database_path=db_path, debug=debug)
        return output_json
def images():
    full_db = load_db(database_path=TRENDS_DATABASE_PATH)

    # from trends content
    # send back only a portion of the db
    contents = full_db['trends']
    del full_db

    output_content = []
    for c in contents:
        output_media_url = []
        try:
            for t in c['tweets']:
                if t['media']:
                    output_media_url.append({
                        "url": t['url'],
                        "images": t['media']
                    })
        except:
            continue

        output_content.append({
            "label": c['label'],
            "time": c['time'],
            "media": output_media_url
        })

    output_results = {"content": output_content}

    trends_output = {"results": output_results, "status": 'ok'}

    return jsonify(trends_output)
def all():
    """
        the full database. use to get updated db before updating container
        ---
        responses:
          200:
            description: returns database
            schema:
              id: databaseGet
              properties:
                results:
                  type: json
                  default: {trends: {include_hashtags: {}, exclude_hashtags: {}, hashtags: {}}
                status:
                  type: number
                  default: 200
    """
    args = request.args.get('q')
    if not args:
        args = "main"

    if args == "main":
        full_db = load_db(database_path=DATABASE_PATH)

        db_init_timestamp = str_2_datetime(
            full_db['trends']['include_hashtags']['initial_timestamp'],
            input_format=time_format_full_with_timezone)
        db_update_timestamp = str_2_datetime(
            full_db['trends']['include_hashtags']['timestamp'],
            input_format=time_format_full_with_timezone)

        print("time since app start: {:.2f} minutes".format(
            (time.time() - start_time) / 60))
        print("time since database init: {:.2f} hours".format(
            (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp).seconds /
            3600))
        print("time since last update: {:.2f} minutes".format(
            (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds
            / 60))

    elif args == "trends":
        full_db = load_db(database_path=TRENDS_DATABASE_PATH)
    elif args == "top_posts":
        full_db = load_db(database_path=TOP_RETWEETS_DATABASE_PATH)

    return jsonify(full_db)
def check_db(db_path=top_retweets_db_path):
    db = load_db(db_path)

    c = db['top_posts']

    [print(repr(x['text'])) for x in c]
    print('number of posts: ', len(c))
    del db
    del c
예제 #6
0
def daily():
    print("time since app start: {} minutes".format(str((time.time() - start_time) / 60)))
    print("time since last update: {} minutes".format(str((time.time() - update_start) / 60)))

    full_db = load_db(database_path=DATABASE_PATH)

    #print(full_db)

    return "hello {}".format('WAIT')
예제 #7
0
def all():
    full_db = load_db(database_path=DATABASE_PATH)

    db_init_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['initial_timestamp'], input_format=time_format_full_with_timezone)
    db_update_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['timestamp'], input_format=time_format_full_with_timezone)

    print("time since app start: {:.2f} minutes".format((time.time() - start_time) / 60))
    print("time since database init: {:.2f} hours".format((datetime.datetime.now(tz=pytz.utc) - db_init_timestamp).seconds/3600))
    print("time since last update: {:.2f} minutes".format((datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds/60))

    return jsonify(full_db)
예제 #8
0
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True):
    cache_db = load_db(database_path=db_path, debug=False)
    trends_db = cache_db['trends']
    if exclude_hashtags:
        trends_cache = trends_db['exclude_hashtags']
    else:
        trends_cache = trends_db['include_hashtags']

    # compare db and now
    try:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone)
    except ValueError:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone)
        db_timestamp = db_timestamp.astimezone(tz=pytz.utc)

    rq_timestamp = datetime.datetime.now(tz=pytz.utc)

    time_diff = rq_timestamp - db_timestamp
    print('time since last trends API call: {} (h:m:s)'.format(time_diff))
    print('time diff in seconds: {}'.format(time_diff.seconds))
    print('time in db: {}'.format(db_timestamp))
    print('time in rq: {}'.format(rq_timestamp))
    if time_diff.seconds < cache_duration_mins*60:
        print('less than cache duration, returning cache')
        output_json = json.dumps(trends_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags)
        # update
        output_list = json.loads(output_json)

        if append_db:
            output_list = trends_cache['content'] + output_list
            
        if exclude_hashtags:
            cache_db['trends']['exclude_hashtags']['content'] = output_list
            cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)
        else:
            cache_db['trends']['include_hashtags']['content'] = output_list
            cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)

        update_db(cache_db, database_path=db_path, debug=debug)
        return output_json
def top_posts():
    try:
        arg = int(request.args.get('count'))
        if not arg:
            arg = 100
    except:
        arg = 100

    full_db = load_db(database_path=TOP_RETWEETS_DATABASE_PATH)

    # from trends content
    # send back only a portion of the db
    contents = full_db['top_posts']
    del full_db

    print('returning {} most recent items from db'.format(arg))

    output = {"results": contents[arg * -1:], "status": "ok"}

    return jsonify(output)
def hashtags_twitter_only():
    """
        get list of latest tweets, locations, sentiment, and time
        ---
        parameters:
          - name: location
            in: query
            type: string
            required: true
            default: osaka
        responses:
          200:
            description: returns a json list of tweets
            schema:
              id: predictionGet
              properties:
                results:
                  type: json
                  default: setosa
                status:
                  type: number
                  default: 200
    """
    print("time since app start: {:.2f} minutes".format(
        str((time.time() - start_time) / 60)))
    print("time since last update: {:.2f} minutes".format(
        str((time.time() - update_start) / 60)))

    full_db = load_db(database_path=DATABASE_PATH)

    direct_hashtags_from_trends = full_db['trends']['include_hashtags'][
        'content']

    output = []
    for t in direct_hashtags_from_trends:
        output.append(t['label'])

    output_str = '<br />'.join(output)
    return textify(output_str)
def get_update_top_posts_from_twitter(min_retweets=10000, cache_duration_mins=15, debug=False, append_db=True):
    """
        also updates daily trends db, but doesn't return it
        :param country:
        :param exclude_hashtags:
        :param debug:
        :param cache_duration_mins:
        :param append_db:
        :return:
        """
    # load retweets db
    top_retweets_db = load_db(database_path=top_retweets_db_path, debug=False)
    top_posts_cache = top_retweets_db['top_posts']


    output_list = analyze_top_retweets(min_retweets=min_retweets, debug=debug)

    if append_db:
        # check for same post
        for o in output_list:
            # apparently [:] changes list in place (saves ram)
            # cuts away duplicates of tweets
            top_posts_cache[:] = [d for d in top_posts_cache if d.get('url') != o['url']]

        # adds new tweets at the end
        output_list = top_posts_cache + output_list

    top_retweets_db['top_posts'] = output_list

    update_db(top_retweets_db, database_path=top_retweets_db_path, debug=debug)

    print('top posts db updated.')

    del top_retweets_db
    del top_posts_cache
    del output_list

    print('memory freed.')
def trends():
    """
        loads trends database
        ---
        responses:
          200:
            description: returns database
            schema:
              id: databaseGet
              properties:
                results:
                  type: json
                  default: {content: {}, timestamp: "", initial_timestamp: ""}
                status:
                  type: string
                  default: ok
    """
    full_db = load_db(database_path=DATABASE_PATH)

    db_init_timestamp = str_2_datetime(
        full_db['trends']['include_hashtags']['initial_timestamp'],
        input_format=time_format_full_with_timezone)

    db_update_timestamp = str_2_datetime(
        full_db['trends']['include_hashtags']['timestamp'],
        input_format=time_format_full_with_timezone)

    print("time since app start: {:.2f} minutes".format(
        (time.time() - start_time) / 60))
    print("time since database init: {}".format(
        (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp)))
    print("time since last update: {:.2f} minutes".format(
        (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds /
        60))
    print('\ndebug:')
    print('time now: {}'.format(datetime.datetime.now(tz=pytz.utc)))
    print('db init time: {}'.format(db_init_timestamp))
    print('diff: {}'.format(
        datetime.datetime.now(tz=pytz.utc) - db_init_timestamp))

    # send back only a portion of the db
    results = full_db['trends']['include_hashtags']
    del full_db

    contents = results['content']

    output_content = []
    for c in contents:
        output_content.append({
            "label": c['label'],
            "time": c['time'],
            "volume": c['volume']
        })

    output_results = {
        "content": output_content,
        "timestamp": results['timestamp'],
        "initial_timestamp": results['initial_timestamp']
    }

    trends_output = {"results": output_results, "status": 'ok'}

    return jsonify(trends_output)
def backup():
    backup_db = load_db(database_path=DATABASE_PATH + '.bak')
    return jsonify(backup_db)
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True):
    """
    also updates daily trends db, but doesn't return it
    for trends, timestamp used is in time called

    :param country:
    :param exclude_hashtags:
    :param debug:
    :param cache_duration_mins:
    :param append_db:
    :return:
    """
    # load main db
    cache_db = load_db(database_path=db_path, debug=False)
    trends_db = cache_db['trends']

    if exclude_hashtags:
        trends_cache = trends_db['exclude_hashtags']
    else:
        trends_cache = trends_db['include_hashtags']

    # load trends + top retweets db
    trend_search_db = load_db(database_path=trends_db_path, debug=False)



    # MAIN_DB ONLY
    try:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone)
    except ValueError:
        db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone)
        db_timestamp = db_timestamp.astimezone(tz=pytz.utc)

    rq_timestamp = datetime.datetime.now(tz=pytz.utc)

    time_diff = rq_timestamp - db_timestamp
    print('time since last trends API call: {} (h:m:s)'.format(time_diff))
    print('time diff in seconds: {}'.format(time_diff.seconds))
    print('time in db: {}'.format(db_timestamp))
    print('time in rq: {}'.format(rq_timestamp))

    if time_diff.seconds < cache_duration_mins*60:
        print('less than cache duration, returning cache')
        output_json = json.dumps(trends_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json, img_output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags)
        # update
        output_list = json.loads(output_json)
        trend_search_list = json.loads(img_output_json)

        if append_db:
            output_list = trends_cache['content'] + output_list
            trend_search_list = trend_search_db['trends'] + trend_search_list
            
        if exclude_hashtags:
            cache_db['trends']['exclude_hashtags']['content'] = output_list
            cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)
        else:
            cache_db['trends']['include_hashtags']['content'] = output_list
            cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone)

        trend_search_db['trends'] = trend_search_list

        update_db(cache_db, database_path=db_path, debug=debug)
        update_db(trend_search_db, database_path=trends_db_path, debug=debug)

        print('trends and image database updated.')

        del cache_db
        del trends_db
        del trends_cache
        del trend_search_db
        del trend_search_list
        del output_list
        del output_json
        del img_output_json

        print('memory freed.')