Esempio n. 1
0
def crawl_friends(session,
                  twitter_api,
                  crawl_user,
                  crawl_user_id=None,
                  when=None):
    if not crawl_user_id:
        crawl_user_id = crawl_user.id

    if when is None:
        when = now()

    for user_id in twitter_api.GetFriendIDs(user_id=crawl_user_id):
        try:
            if session.query(Account)\
                      .filter_by(external_id=twitter_external_user_id(user_id))\
                      .first():
                continue

            else:
                user = twitter_api.GetUser(user_id=user_id)
                new_user = insert_user(session, user)
                print(new_user)
                get_or_create(session,
                              AccountRelationship,
                              left=crawl_user,
                              right=new_user,
                              rel="follows",
                              when=when)

        except twitter.error.TwitterError as e:
            print(user_id, e)
            continue
Esempio n. 2
0
    def helper(session, external_id, persona=None, when=None):
        when = when or now()
        _svc = service_ctor(session)
        _extid = external_id_fn(external_id)

        account = session.query(schema.Account)\
                         .filter_by(service=_svc,
                                    external_id=_extid)\
                         .first()
        if not account:
            account = schema.Account(service=_svc, external_id=_extid)
            session.add(account)

        if when:
            account.when = when

        if account.persona and persona:
            from bbdb.personas import merge_left
            merge_left(session, persona, account.persona)

        else:
            persona = account.persona = persona or schema.Persona()

        schema.get_or_create(session,
                             schema.Name,
                             name=external_id,
                             account=account)

        session.commit()
        session.refresh(account)
        return account
Esempio n. 3
0
def insert_display_name(session, user: User, handle=None, when=None):
    """Insert a display name, attaching it to a handle."""

    if user.name:
        external_id = twitter_external_user_id(user.id)
        handle = handle or get_or_create(
            session, Account, external_id=external_id)
        display_name = get_or_create(session,
                                     Name,
                                     name=user.name,
                                     account=handle)
        display_name.when = when or now()
        session.add(display_name)

        return display_name
Esempio n. 4
0
def insert_screen_name(session, user: User, handle=None, when=None):
    """Insert a screen name, attaching it to a handle."""

    if user.screen_name:
        external_id = twitter_external_user_id(user.id)
        handle = handle or get_or_create(
            session, Account, external_id=external_id)
        screen_name = get_or_create(session,
                                    Name,
                                    name="@" + user.screen_name,
                                    account=handle)
        screen_name.when = when or now()
        session.add(screen_name)

        return screen_name
Esempio n. 5
0
    def helper(session):
        service = session.query(schema.Service).filter(
            schema.Service.name == name.lower()).first()
        if not service:
            service = schema.get_or_create(session,
                                           schema.Service,
                                           name=name.lower())

        if service.more and "pretty_name" not in service.more:
            service.more["pretty_name"] = name

        elif not service.more:
            service.more = {"pretty_name": name}

        for url in urls:
            schema.get_or_create(session,
                                 schema.ServiceURL,
                                 service=service,
                                 url=normalize_url(url) if normalize else url)
        return service
Esempio n. 6
0
def crawl_followers(session,
                    twitter_api,
                    crawl_user,
                    crawl_user_id=None,
                    when=None):
    if not crawl_user_id:
        crawl_user_id = crawl_user.id

    if when is None:
        when = now()

    for user_id in twitter_api.GetFollowerIDs(user_id=crawl_user_id):
        try:
            extid = twitter_external_user_id(user_id)
            handle = session.query(Account)\
                            .filter_by(external_id=extid)\
                            .first()

            if handle and handle.names:
                print("Already know of user", user_id, "AKA",
                      ", ".join([an.name for an in handle.names]))
                continue

            else:
                # Hydrate the one user explicitly
                user = twitter_api.GetUser(user_id=user_id)
                new_account = insert_user(session, user)
                print(new_account)
                get_or_create(session,
                              AccountRelationship,
                              left=new_account,
                              right=crawl_user,
                              rel="follows",
                              when=when)

        except twitter.error.TwitterError as e:
            print(user_id, e)
            continue
Esempio n. 7
0
def insert_name(session, persona, name):
    """Add a name to the given persona by linking it through a null service."""

    nullsvc = null_service(session)
    nullact = session.query(schema.Account)\
                     .filter_by(service=nullsvc,
                                persona=persona)\
                     .first()
    if not nullact:
        nullact = schema.Account(service=nullsvc,
                                 external_id=_nullsvc_fk(persona.id),
                                 persona=persona)
        session.add(nullact)
        session.commit()

    return get_or_create(session, schema.Name, name=name, account=nullact)
Esempio n. 8
0
    session = factory()

    if len(sys.argv) == 2:
        user_id = twitter_api.GetUser(screen_name=sys.argv[1]).id

    else:
        user_id = twitter_api.VerifyCredentials().id

    try:
        when = arrow.utcnow()

        for user in twitter_api.GetFollowers(user_id=user_id):
            print(twitter.insert_user(session, user))
            schema.get_or_create(session,
                                 schema.TwitterFollows,
                                 follows_id=user_id,
                                 follower_id=user.id,
                                 when=when)

        for user in twitter_api.GetFriends(user_id=user_id):
            print(twitter.insert_user(session, user))
            schema.get_or_create(session,
                                 schema.TwitterFollows,
                                 follower_id=user_id,
                                 follows_id=user.id,
                                 when=when)

    finally:
        session.flush()
        session.close()
Esempio n. 9
0
def insert_user(session, kb_user, persona=None, when=None, twitter_api=None):
  kb_account = _insert_user(session, kb_user.id,
                            persona=persona, when=when)

  name = schema.get_or_create(session, schema.Name,
                              name=kb_user.username,
                              account=kb_account)

  for proof in kb_user.proofs:
    if proof.proof_type == "generic_web_site":
      # FIXME: do something with this.
      continue

    elif proof.proof_type == "twitter":
      # FIXME: Try to find (or create) a Twitter user.
      #
      # It happens to be safe just to search by @-handle since we drive keybase from Twitter for
      # now. But that may not be safe in the future. Really this should push to a Twitter user
      # ingesting queue or something somewhere.
      proved_service = insert_twitter(session)
      twitter_account = session.query(schema.Account)\
                               .filter_by(service=proved_service)\
                               .join(schema.Name)\
                               .filter(schema.Name.name=="@{}".format(proof.nametag))\
                               .first()
      if not twitter_account and twitter_api:
        twitter_insert_user(session, twitter_api.GetUser(screen_name=proof.nametag),
                            persona=kb_account.persona)

      elif twitter_account:
        merge_left(session, kb_account.persona, twitter_account.persona)

      else:
        print("[WARN] Unable to link proved Twitter identity @{}".format(proof.nametag))
        continue

    else:
      # We make a bunch of assumptions about other services...
      proved_service = schema.get_or_create(session, schema.Service,
                                            name=proof.proof_type)

      # Insert the service's URL
      schema.get_or_create(session, schema.ServiceURL,
                           service=proved_service,
                           url=normalize_url(proof.service_url))

      external_id = ("%s:%s" % (proof.proof_type, proof.nametag))

      proved_account = session.query(schema.Account)\
                              .filter_by(service=proved_service,
                                         external_id=external_id)\
                              .first()

      if not proved_account:
        proved_account = schema.Account(service=proved_service,
                                        external_id=external_id,
                                        persona=kb_account.persona)

      elif proved_account.persona_id is not None:
        merge_left(session, kb_account.persona, proved_account.persona)
      else:
        proved_account.persona_id = persona.id

      session.add(proved_account)

      nametag = schema.get_or_create(session, schema.Name,
                                     name=proof.nametag,
                                     account=proved_account)
      nametag.persona = persona
      session.add(nametag)
      session.commit()

    print("User", kb_account, "proved for service", proved_service)

  return kb_account
Esempio n. 10
0
def insert_tweet(session, twitter_api, tweet):
    """Insert a tweet (status using the old API terminology) into the backing datastore.

  This means inserting the original poster, inserting the service, inserting the post and inserting
  the post distribution.

  WARNING: this function does NOT recursively insert replied to tweets, or quoted tweets. It's
  expected that some other system handles walking the tree of tweets to deal with all that. This is,
  ultimately, to work around the garbage Twitter rate limits.

  """

    _tw = insert_twitter(session)
    try:
        poster = tweet.user
        if not isinstance(poster, User):
            poster = User.NewFromJsonDict(poster)
        poster = insert_user(session, poster)
        assert isinstance(poster, Account)
    except AssertionError as e:
        print("Encountered exception", repr(e), traceback.format_exc(),
              "Processing tweet", tweet)
        return None

    dupe = session.query(Post)\
                  .filter_by(external_id=twitter_external_tweet_id(tweet.id))\
                  .first()
    # There's a dummy record in place, flesh it out. We're in a monoid here.
    if dupe:
        dupe.poster = poster
        dupe.when = aget(
            datetime.strptime(tweet.created_at, _tw_datetime_pattern))
        dupe.text = _get_tweet_text(tweet)
        dupe.more = tweet.AsDict()
        session.add(dupe)
        session.commit()
        return dupe

    # We're inserting a new tweet here...
    else:
        post = Post(service=_tw,
                    text=_get_tweet_text(tweet),
                    external_id=twitter_external_tweet_id(tweet.id_str),
                    poster=poster,
                    when=aget(
                        datetime.strptime(tweet.created_at,
                                          _tw_datetime_pattern)),
                    more=tweet.AsDict())
        session.add(post)

        for user in tweet.user_mentions:
            get_or_create(session,
                          PostDistribution,
                          post=post,
                          recipient=insert_user(session, User(id=user.id)),
                          rel="to")

        if tweet.in_reply_to_status_id:
            get_or_create(session,
                          PostRelationship,
                          left=post,
                          right=_tweet_or_dummy(session,
                                                tweet.in_reply_to_status_id),
                          rel="reply-to")

        if tweet.quoted_status_id:
            get_or_create(session,
                          PostRelationship,
                          left=post,
                          right=_tweet_or_dummy(session,
                                                tweet.quoted_status_id),
                          rel="quotes")

        session.commit()

        return post
Esempio n. 11
0
def _tweet_or_dummy(session, external_id):
    return get_or_create(session,
                         Post,
                         external_id=twitter_external_tweet_id(external_id),
                         service=insert_twitter(session))