def main(time_frame, max_request_per_time_frame, mongo_coll, search_params,
         max_id, termination_function):
    tcs = TwitterCrawler(time_frame=time_frame,
                         max_requests=max_request_per_time_frame)
    tcs.connect(mongo_coll)
    tcs.authenticate("../api_key.json")
    tcs.set_search_arguments(search_args=search_params)
    tcs.search_by_query(wait_for=3,
                        current_max_id=max_id,
                        term_func=termination_function)
    tcs.close()
def collect_tweets_by_search_terms(search_configs_filepath, output_folder,
                                   config):

    apikeys = list(config['apikeys'].values()).pop()

    search_configs = {}
    with open(os.path.abspath(search_configs_filepath),
              'r') as search_configs_rf:
        search_configs = json.load(search_configs_rf)

    for search_config_id in itertools.cycle(search_configs):

        search_config = search_configs[search_config_id]

        search_terms = [term.lower() for term in search_config['terms']]
        querystring = '%s' % (' OR '.join('(' + term + ')'
                                          for term in search_terms))
        since_id = search_config[
            'since_id'] if 'since_id' in search_config else 0
        geocode = tuple(search_config['geocode']) if (
            'geocode' in search_config and search_config['geocode']) else None

        logger.info(
            'REQUEST -> (md5(querystring): [%s]; since_id: [%d]; geocode: [%s])'
            % (util.md5(querystring.encode('utf-8')), since_id, geocode))

        try:
            twitterCralwer = TwitterCrawler(apikeys=apikeys,
                                            client_args=CLIENT_ARGS,
                                            output_folder=output_folder)
            since_id = twitterCralwer.search_by_query(querystring,
                                                      geocode=geocode,
                                                      since_id=since_id)
        except Exception as exc:
            logger.error(exc)
            logger.error(util.full_stack())
            pass

        search_config['since_id'] = since_id
        search_config['querystring'] = querystring
        search_config['geocode'] = geocode

        search_configs[search_config_id] = search_config

        flash_cmd_config(search_configs, search_configs_filepath,
                         output_folder)

        logger.info(
            'COMPLETED -> (md5(querystring): [%s]; since_id: [%d]; geocode: [%s])'
            % (util.md5(querystring.encode('utf-8')), since_id, geocode))
        logger.info('PAUSE %ds to CONTINUE...' % WAIT_TIME)
        time.sleep(WAIT_TIME)
Beispiel #3
0
def collect_tweets_by_search_terms(search_configs_filepath, output_folder, config):
    
    apikeys = list(config['apikeys'].values()).pop()

    search_configs = {}
    with open(os.path.abspath(search_configs_filepath), 'r') as search_configs_rf:
        search_configs = json.load(search_configs_rf)

    for search_config_id in itertools.cycle(search_configs):
       
        search_config = search_configs[search_config_id]

        search_terms = [term.lower() for term in search_config['terms']]
        querystring = '%s'%(' OR '.join('(' + term + ')' for term in search_terms))
        since_id = search_config['since_id'] if 'since_id' in search_config else 0
        geocode = tuple(search_config['geocode']) if ('geocode' in search_config and search_config['geocode']) else None

        logger.info('REQUEST -> (md5(querystring): [%s]; since_id: [%d]; geocode: [%s])'%(util.md5(querystring.encode('utf-8')), since_id, geocode))


        try:
            twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder = output_folder)
            since_id = twitterCralwer.search_by_query(querystring, geocode = geocode, since_id = since_id)
        except Exception as exc:
            logger.error(exc)
            logger.error(util.full_stack())
            pass

        search_config['since_id'] = since_id
        search_config['querystring'] = querystring
        search_config['geocode'] = geocode

        search_configs[search_config_id] = search_config

        flash_cmd_config(search_configs, search_configs_filepath, output_folder)

        logger.info('COMPLETED -> (md5(querystring): [%s]; since_id: [%d]; geocode: [%s])'%(util.md5(querystring.encode('utf-8')), since_id, geocode))
        logger.info('PAUSE %ds to CONTINUE...'%WAIT_TIME)
        time.sleep(WAIT_TIME)