Python Twarc Beispiele

Programmiersprache: Python

Namespace / Paketname: twarc.client

Klasse / Typ: Twarc

Beispiele auf hotexamples.com: 3

Python Twarc - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die twarc.client.Twarc, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Twarc(2)

dehydrate(2)

configure(1)

Häufig verwendete Methoden

Twarc (2)

dehydrate (2)

configure (1)

Beispiel #1

Datei anzeigen

Datei: command.py Projekt: gwu-libraries/twarc

def main():
    parser = get_argparser()
    args = parser.parse_args()

    command = args.command
    query = args.query or ""

    logging.basicConfig(
        filename=args.log,
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s"
    )

    # catch ctrl-c so users don't see a stack trace
    signal.signal(signal.SIGINT, lambda signal, frame: sys.exit(0))

    if command == "version":
        print("twarc v%s" % __version__)
        sys.exit()
    elif command == "help" or not command:
        parser.print_help()
        print("\nPlease use one of the following commands:\n")
        for cmd in commands:
            print(" - %s" % cmd)
        print("\nFor example:\n\n    twarc search blacklivesmatter")
        sys.exit(1)

    # Don't validate the keys if the command is "configure"
    if command == "configure" or args.skip_key_validation:
        validate_keys = False
    else:
        validate_keys = True


    t = Twarc(
        consumer_key=args.consumer_key,
        consumer_secret=args.consumer_secret,
        access_token=args.access_token,
        access_token_secret=args.access_token_secret,
        connection_errors=args.connection_errors,
        http_errors=args.http_errors,
        config=args.config,
        profile=args.profile,
        tweet_mode=args.tweet_mode,
        protected=args.protected,
        validate_keys=validate_keys,
    )

    # calls that return tweets
    if command == "search":
        things = t.search(
            query,
            since_id=args.since_id,
            max_id=args.max_id,
            lang=args.lang,
            result_type=args.result_type,
            geocode=args.geocode
        )

    elif command == "filter":
        things = t.filter(
            track=query,
            follow=args.follow,
            locations=args.locations
        )

    elif command == "dehydrate":
        input_iterator = fileinput.FileInput(
            query,
            mode='r',
            openhook=fileinput.hook_compressed,
        )
        things = t.dehydrate(input_iterator)

    elif command == "hydrate":
        input_iterator = fileinput.FileInput(
            query,
            mode='r',
            openhook=fileinput.hook_compressed,
        )
        things = t.hydrate(input_iterator)

    elif command == "tweet":
        things = [t.tweet(query)]

    elif command == "sample":
        things = t.sample()

    elif command == "timeline":
        kwargs = {"max_id": args.max_id, "since_id": args.since_id}
        if re.match('^[0-9]+$', query):
            kwargs["user_id"] = query
        else:
            kwargs["screen_name"] = query
        things = t.timeline(**kwargs)

    elif command == "retweets":
        things = t.retweets(query)

    elif command == "users":
        if os.path.isfile(query):
            iterator = fileinput.FileInput(
                query,
                mode='r',
                openhook=fileinput.hook_compressed,
            )
            if re.match('^[0-9,]+$', next(open(query))):
                id_type = 'user_id'
            else:
                id_type = 'screen_name'
            things = t.user_lookup(ids=iterator, id_type=id_type)
        elif re.match('^[0-9,]+$', query):
            things = t.user_lookup(ids=query.split(","))
        else:
            things = t.user_lookup(ids=query.split(","), id_type='screen_name')

    elif command == "followers":
        things = t.follower_ids(query)

    elif command == "friends":
        things = t.friend_ids(query)

    elif command == "trends":
        # lookup woeid for geo-coordinate if appropriate
        geo = re.match('^([0-9\-\.]+),([0-9\-\.]+)$', query)
        if geo:
            lat, lon = map(float, geo.groups())
            if lat > 180 or lat < -180 or lon > 180 or lon < -180:
                parser.error('LAT and LONG must be within [-180.0, 180.0]')
            places = list(t.trends_closest(lat, lon))
            if len(places) == 0:
                parser.error("Couldn't find WOE ID for %s" % query)
            query = places[0]["woeid"]

        if not query:
            things = t.trends_available()
        else:
            trends = t.trends_place(query)
            if trends:
                things = trends[0]['trends']

    elif command == "replies":
        tweet = t.tweet(query)
        if not tweet:
            parser.error("tweet with id %s does not exist" % query)
        things = t.replies(tweet, args.recursive)

    elif command == "listmembers":
        list_parts = re.match('^https://twitter.com/(.+)/lists/(.+)$', query)
        if not list_parts:
            parser.error("provide the url for the list, e.g., https://twitter.com/USAFacts/lists/us-armed-forces")
        things = t.list_members(slug=list_parts.group(2),
                                owner_screen_name=list_parts.groups(1))

    elif command == "configure":
        t.configure()
        sys.exit()

    else:
        parser.print_help()
        print("\nPlease use one of the following commands:\n")
        for cmd in commands:
            print(" - %s" % cmd)
        print("\nFor example:\n\n    twarc search blacklivesmatter")
        sys.exit(1)

    # get the output filehandle
    if args.output:
        if pyv == 3:
            fh = codecs.open(args.output, 'wb', 'utf8')
        else:
            fh = open(args.output, 'w')
    else:
        fh = sys.stdout

    # optionally create a csv writer
    csv_writer = None
    if args.format in ("csv", "csv-excel") and command not in ["filter", "hydrate", "replies",
            "retweets", "sample", "search", "timeline", "tweet"]:
        parser.error("csv output not available for %s" % command)
    elif args.format in ("csv", "csv-excel"):
        csv_writer = csv.writer(fh)
        csv_writer.writerow(get_headings())

    line_count = 0
    file_count = 0
    for thing in things:

        # rotate the files if necessary
        if args.output and args.split and line_count % args.split == 0:
            file_count += 1
            fh = codecs.open(numbered_filepath(args.output, file_count), 'wb', 'utf8')
            if csv_writer:
                csv_writer = csv.writer(fh)
                csv_writer.writerow(get_headings())

        line_count += 1

        # ready to output

        kind_of = type(thing)
        if kind_of == str_type:
            # user or tweet IDs
            print(thing, file=fh)
            logging.info("archived %s" % thing)
        elif 'id_str' in thing:
            # tweets and users
            if (args.format == "json"):
                print(json.dumps(thing), file=fh)
            elif (args.format == "csv"):
                csv_writer.writerow(get_row(thing))
            elif (args.format == "csv-excel"):
                csv_writer.writerow(get_row(thing, excel=True))
            logging.info("archived %s", thing['id_str'])
        elif 'woeid' in thing:
            # places
            print(json.dumps(thing), file=fh)
        elif 'tweet_volume' in thing:
            # trends
            print(json.dumps(thing), file=fh)
        elif 'limit' in thing:
            # rate limits
            t = datetime.datetime.utcfromtimestamp(
                float(thing['limit']['timestamp_ms']) / 1000)
            t = t.isoformat("T") + "Z"
            logging.warn("%s tweets undelivered at %s",
                         thing['limit']['track'], t)
            if args.warnings:
                print(json.dumps(thing), file=fh)
        elif 'warning' in thing:
            # other warnings
            logging.warn(thing['warning']['message'])
            if args.warnings:
                print(json.dumps(thing), file=fh)

Beispiel #2

Datei anzeigen

Datei: command.py Projekt: rongpenl/twarc

def main():
    parser = get_argparser()
    args = parser.parse_args()

    command = args.command
    query = args.query or ""

    logging.basicConfig(filename=args.log,
                        level=logging.INFO,
                        format="%(asctime)s %(levelname)s %(message)s")

    # log and stop when process receives SIGINT
    def stop(signal, frame):
        log.warn('process received SIGNT, stopping')
        sys.exit(0)

    signal.signal(signal.SIGINT, stop)

    if command == "version":
        print("twarc v%s" % __version__)
        sys.exit()
    elif command == "help" or not command:
        parser.print_help()
        print("\nPlease use one of the following commands:\n")
        for cmd in commands:
            print(" - %s" % cmd)
        print("\nFor example:\n\n    twarc search blacklivesmatter")
        sys.exit(1)

    # Don't validate the keys if the command is "configure"
    if command == "configure" or args.skip_key_validation:
        validate_keys = False
    else:
        validate_keys = True

    t = Twarc(consumer_key=args.consumer_key,
              consumer_secret=args.consumer_secret,
              access_token=args.access_token,
              access_token_secret=args.access_token_secret,
              connection_errors=args.connection_errors,
              http_errors=args.http_errors,
              config=args.config,
              profile=args.profile,
              tweet_mode=args.tweet_mode,
              protected=args.protected,
              validate_keys=validate_keys,
              app_auth=args.app_auth,
              gnip_auth=args.gnip_auth)

    # calls that return tweets
    if command == "search":
        if len(args.lang) > 0:
            lang = args.lang[0]
        else:
            lang = None

        # if not using a premium endpoint do a standard search
        if not args.thirtyday and not args.fullarchive and not args.gnip_fullarchive:
            things = t.search(query,
                              since_id=args.since_id,
                              max_id=args.max_id,
                              lang=lang,
                              result_type=args.result_type,
                              geocode=args.geocode)
        else:
            # parse the dates if given
            from_date = parse_dt(args.from_date) if args.from_date else None
            to_date = parse_dt(args.to_date) if args.to_date else None
            if args.gnip_fullarchive:
                env = args.gnip_fullarchive
                product = 'gnip_fullarchive'
            elif args.thirtyday:
                env = args.thirtyday
                product = '30day'
            else:
                env = args.fullarchive
                product = 'fullarchive'
            things = t.premium_search(
                query,
                product,
                env,
                from_date=from_date,
                to_date=to_date,
                sandbox=args.sandbox,
                limit=args.limit,
            )

    elif command == "filter":
        things = t.filter(track=query,
                          follow=args.follow,
                          locations=args.locations,
                          lang=args.lang)

    elif command == "dehydrate":
        input_iterator = fileinput.FileInput(
            query,
            mode='r',
            openhook=fileinput.hook_compressed,
        )
        things = t.dehydrate(input_iterator)

    elif command == "hydrate":
        input_iterator = fileinput.FileInput(
            query,
            mode='r',
            openhook=fileinput.hook_compressed,
        )
        things = t.hydrate(input_iterator)

    elif command == "tweet":
        things = [t.tweet(query)]

    elif command == "sample":
        things = t.sample()

    elif command == "timeline":
        kwargs = {"max_id": args.max_id, "since_id": args.since_id}
        if re.match('^[0-9]+$', query):
            kwargs["user_id"] = query
        elif query:
            kwargs["screen_name"] = query
        things = t.timeline(**kwargs)

    elif command == "retweets":
        if os.path.isfile(query):
            iterator = fileinput.FileInput(
                query,
                mode='r',
                openhook=fileinput.hook_compressed,
            )
            things = t.retweets(tweet_ids=iterator)
        else:
            things = t.retweets(tweet_ids=query.split(','))

    elif command == "users":
        if os.path.isfile(query):
            iterator = fileinput.FileInput(
                query,
                mode='r',
                openhook=fileinput.hook_compressed,
            )
            if re.match('^[0-9,]+$', next(open(query))):
                id_type = 'user_id'
            else:
                id_type = 'screen_name'
            things = t.user_lookup(ids=iterator, id_type=id_type)
        elif re.match('^[0-9,]+$', query):
            things = t.user_lookup(ids=query.split(","))
        else:
            things = t.user_lookup(ids=query.split(","), id_type='screen_name')

    elif command == "followers":
        things = t.follower_ids(query)

    elif command == "friends":
        things = t.friend_ids(query)

    elif command == "trends":
        # lookup woeid for geo-coordinate if appropriate
        geo = re.match('^([0-9-.]+),([0-9-.]+)$', query)
        if geo:
            lat, lon = map(float, geo.groups())
            if lat > 180 or lat < -180 or lon > 180 or lon < -180:
                parser.error('LAT and LONG must be within [-180.0, 180.0]')
            places = list(t.trends_closest(lat, lon))
            if len(places) == 0:
                parser.error("Couldn't find WOE ID for %s" % query)
            query = places[0]["woeid"]

        if not query:
            things = t.trends_available()
        else:
            trends = t.trends_place(query)
            if trends:
                things = trends[0]['trends']

    elif command == "replies":
        tweet = t.tweet(query)
        if not tweet:
            parser.error("tweet with id %s does not exist" % query)
        things = t.replies(tweet, args.recursive)

    elif command == "listmembers":
        list_parts = re.match('^https://twitter.com/(.+)/lists/(.+)$', query)
        if not list_parts:
            parser.error(
                "provide the url for the list, e.g., https://twitter.com/USAFacts/lists/us-armed-forces"
            )
        things = t.list_members(slug=list_parts.group(2),
                                owner_screen_name=list_parts.groups(1))

    elif command == "configure":
        t.configure()
        sys.exit()

    else:
        parser.print_help()
        print("\nPlease use one of the following commands:\n")
        for cmd in commands:
            print(" - %s" % cmd)
        print("\nFor example:\n\n    twarc search blacklivesmatter")
        sys.exit(1)

    # get the output filehandle
    if args.output:
        if pyv == 3:
            fh = codecs.open(args.output, 'wb', 'utf8')
        else:
            fh = open(args.output, 'w')
    else:
        fh = sys.stdout

    # optionally create a csv writer
    csv_writer = None
    if args.format in ("csv", "csv-excel") and command not in [
            "filter", "hydrate", "replies", "retweets", "sample", "search",
            "timeline", "tweet"
    ]:
        parser.error("csv output not available for %s" % command)
    elif args.format in ("csv", "csv-excel"):
        csv_writer = csv.writer(fh)
        csv_writer.writerow(get_headings())

    line_count = 0
    file_count = 0
    for thing in things:

        # rotate the files if necessary
        if args.output and args.split and line_count % args.split == 0:
            file_count += 1
            fh = codecs.open(numbered_filepath(args.output, file_count), 'wb',
                             'utf8')
            if csv_writer:
                csv_writer = csv.writer(fh)
                csv_writer.writerow(get_headings())

        line_count += 1

        # ready to output

        kind_of = type(thing)
        if kind_of == str_type:
            # user or tweet IDs
            print(thing, file=fh)
            log.info("archived %s" % thing)
        elif 'id_str' in thing:
            # tweets and users
            if (args.format == "json"):
                print(json.dumps(thing), file=fh)
            elif (args.format == "csv"):
                csv_writer.writerow(get_row(thing))
            elif (args.format == "csv-excel"):
                csv_writer.writerow(get_row(thing, excel=True))
            log.info("archived %s", thing['id_str'])
        elif 'woeid' in thing:
            # places
            print(json.dumps(thing), file=fh)
        elif 'tweet_volume' in thing:
            # trends
            print(json.dumps(thing), file=fh)
        elif 'limit' in thing:
            # rate limits
            t = datetime.datetime.utcfromtimestamp(
                float(thing['limit']['timestamp_ms']) / 1000)
            t = t.isoformat("T") + "Z"
            log.warning("%s tweets undelivered at %s", thing['limit']['track'],
                        t)
            if args.warnings:
                print(json.dumps(thing), file=fh)
        elif 'warning' in thing:
            # other warnings
            log.warning(thing['warning']['message'])
            if args.warnings:
                print(json.dumps(thing), file=fh)
        elif 'data' in thing:
            # Labs style JSON schema.
            print(json.dumps(thing), file=fh)

Beispiel #3

Datei anzeigen

def main():
    parser = get_argparser()
    args = parser.parse_args()

    command = args.command
    query = args.query or ""

    logging.basicConfig(filename=args.log,
                        level=logging.INFO,
                        format="%(asctime)s %(levelname)s %(message)s")

    if command == "version":
        print("twarc v%s" % __version__)
        sys.exit()
    elif command == "help" or not command:
        parser.print_help()
        print("\nPlease use one of the following commands:\n")
        for cmd in commands:
            print(" - %s" % cmd)
        print("\nFor example:\n\n    twarc search blacklivesmatter")
        sys.exit(1)

    t = Twarc(consumer_key=args.consumer_key,
              consumer_secret=args.consumer_secret,
              access_token=args.access_token,
              access_token_secret=args.access_token_secret,
              connection_errors=args.connection_errors,
              http_errors=args.http_errors,
              config=args.config,
              profile=args.profile,
              tweet_mode=args.tweet_mode)

    # calls that return tweets
    if command == "search":
        things = t.search(query,
                          since_id=args.since_id,
                          max_id=args.max_id,
                          lang=args.lang,
                          result_type=args.result_type,
                          geocode=args.geocode)

    elif command == "filter":
        things = t.filter(track=query,
                          follow=args.follow,
                          locations=args.locations)

    elif command == "dehydrate":
        input_iterator = fileinput.FileInput(
            query,
            mode='rU',
            openhook=fileinput.hook_compressed,
        )
        things = t.dehydrate(input_iterator)

    elif command == "hydrate":
        input_iterator = fileinput.FileInput(
            query,
            mode='rU',
            openhook=fileinput.hook_compressed,
        )
        things = t.hydrate(input_iterator)

    elif command == "tweet":
        things = [t.tweet(query)]

    elif command == "sample":
        things = t.sample()

    elif command == "timeline":
        kwargs = {"max_id": args.max_id, "since_id": args.since_id}
        if re.match('^[0-9]+$', query):
            kwargs["user_id"] = query
        else:
            kwargs["screen_name"] = query
        things = t.timeline(**kwargs)

    elif command == "retweets":
        things = t.retweets(query)

    elif command == "users":
        if os.path.isfile(query):
            iterator = fileinput.FileInput(
                query,
                mode='rU',
                openhook=fileinput.hook_compressed,
            )
            things = t.user_lookup(iterator=iterator)
        elif re.match('^[0-9,]+$', query):
            things = t.user_lookup(user_ids=query.split(","))
        else:
            things = t.user_lookup(screen_names=query.split(","))

    elif command == "followers":
        things = t.follower_ids(query)

    elif command == "friends":
        things = t.friend_ids(query)

    elif command == "trends":
        # lookup woeid for geo-coordinate if appropriate
        geo = re.match('^([0-9\-\.]+),([0-9\-\.]+)$', query)
        if geo:
            lat, lon = map(float, geo.groups())
            if lat > 180 or lat < -180 or lon > 180 or lon < -180:
                parser.error('LAT and LONG must be within [-180.0, 180.0]')
            places = list(t.trends_closest(lat, lon))
            if len(places) == 0:
                parser.error("Couldn't find WOE ID for %s" % query)
            query = places[0]["woeid"]

        if not query:
            things = t.trends_available()
        else:
            trends = t.trends_place(query)
            if trends:
                things = trends[0]['trends']

    elif command == "replies":
        tweet = t.tweet(query)
        if not tweet:
            parser.error("tweet with id %s does not exist" % query)
        things = t.replies(tweet, args.recursive)

    elif command == "configure":
        t.input_keys()
        sys.exit()

    else:
        parser.print_help()
        print("\nPlease use one of the following commands:\n")
        for cmd in commands:
            print(" - %s" % cmd)
        print("\nFor example:\n\n    twarc search blacklivesmatter")
        sys.exit(1)

    # get the output filehandle
    if args.output:
        fh = codecs.open(args.output, 'wb', 'utf8')
    else:
        fh = sys.stdout

    # optionally create a csv writer
    csv_writer = None
    if args.format == "csv" and command not in [
            "filter", "hydrate", "replies", "retweets", "sample", "search",
            "timeline", "tweet"
    ]:
        parser.error("csv output not available for %s" % command)
    elif args.format == "csv":
        csv_writer = csv.writer(fh)
        csv_writer.writerow(get_headings())

    line_count = 0
    file_count = 0
    for thing in things:

        # rotate the files if necessary
        if args.output and args.split and line_count % args.split == 0:
            file_count += 1
            fh = codecs.open(numbered_filepath(args.output, file_count), 'wb',
                             'utf8')
            if csv_writer:
                csv_writer = csv.writer(fh)
                csv_writer.writerow(get_headings())

        line_count += 1

        # ready to output

        kind_of = type(thing)
        if kind_of == str_type:
            # user or tweet IDs
            print(thing, file=fh)
            logging.info("archived %s" % thing)
        elif 'id_str' in thing:
            # tweets and users
            if (args.format == "json"):
                print(json.dumps(thing), file=fh)
            elif (args.format == "csv"):
                csv_writer.writerow(get_row(thing))
            logging.info("archived %s", thing['id_str'])
        elif 'woeid' in thing:
            # places
            print(json.dump(thing), file=fh)
        elif 'tweet_volume' in thing:
            # trends
            print(json.dump(thing), file=fh)
        elif 'limit' in thing:
            # rate limits
            t = datetime.datetime.utcfromtimestamp(
                float(thing['limit']['timestamp_ms']) / 1000)
            t = t.isoformat("T") + "Z"
            logging.warn("%s tweets undelivered at %s",
                         thing['limit']['track'], t)
            if args.warnings:
                print(json.dump(thing), file=fh)
        elif 'warning' in thing:
            # other warnings
            logging.warn(thing['warning']['message'])
            if args.warnings:
                print(json.dump(thing), file=fh)