Ejemplo n.º 1
0
def user_img(df, vocab_path, url, cache = None, loc_only = True, **kwargs):
  # Generate a dataset that doesn't group users
  # We are only interested in stuff with locations
  with open(vocab_path) as f_vocab:
    vocab = dict([ (x[1].strip(), x[0]) for x in enumerate(f_vocab.readlines()) ])

  extractor = ImageFeatureExtractor(vocab, url, **kwargs)
  df = df.loc[pd.notnull(df['airport'])]
  X = extractor._transform(zip(df['mid'], df['url']), cache)
  df['feat'] = [ x for x in X ]

  # Group our posts by userid and combine the bow and locations into a single feature
  data = { 'uid': [], 'feat': [], 'loc': [] }
  for k,vs in df.groupby('uid').groups.iteritems():
      locs = dict(df.loc[vs]['airport'].value_counts()).items()
      feats = reduce(lambda x,y: x + y, df.loc[vs]['feat'])
      # We are only interested in users with at least one location
      if len(locs) > 0:
          data['uid'].append(k)
          data['loc'].append(locs)
          data['feat'].append(feats)

  df_user = pd.DataFrame(data)

  if loc_only:
    df_user = df_user.loc[pd.notnull(df_user['loc'])]

  X = np.array([ x for x in df_user['feat'] ])
  y_loc = df_user['loc']
                     
  print "User Dataset: %s, %s" % (str(X.shape), str(y_loc.shape))

  extractor = ImageFeatureExtractor(vocab, url, **kwargs)

  return (X, y_loc, extractor)
Ejemplo n.º 2
0
def post_img(df, vocab_path, url, cache = None, **kwargs):
  # Generate a dataset that doesn't group users
  # We are only interested in stuff with locations

  with open(vocab_path) as f_vocab:
    vocab = dict([ (x[1].strip(), x[0]) for x in enumerate(f_vocab.readlines()) ])

  extractor = ImageFeatureExtractor(vocab, url, **kwargs)
  df_img = df.loc[pd.notnull(df['airport'])]
  X = extractor._transform(zip(df_img['mid'], df_img['url']), cache)
  y_loc = df_img['airport']

  print "Post Image Dataset"
  print X.shape
  print y_loc.shape
  print df_img.head()

  return (X, y_loc, extractor)
Ejemplo n.º 3
0
def post_img(df, vocab_path, url, cache=None, **kwargs):
    # Generate a dataset that doesn't group users
    # We are only interested in stuff with locations

    with open(vocab_path) as f_vocab:
        vocab = dict([(x[1].strip(), x[0])
                      for x in enumerate(f_vocab.readlines())])

    extractor = ImageFeatureExtractor(vocab, url, **kwargs)
    df_img = df.loc[pd.notnull(df['airport'])]
    X = extractor._transform(zip(df_img['mid'], df_img['url']), cache)
    y_loc = df_img['airport']

    print "Post Image Dataset"
    print X.shape
    print y_loc.shape
    print df_img.head()

    return (X, y_loc, extractor)
Ejemplo n.º 4
0
def user_img(df, vocab_path, url, cache=None, loc_only=True, **kwargs):
    # Generate a dataset that doesn't group users
    # We are only interested in stuff with locations
    with open(vocab_path) as f_vocab:
        vocab = dict([(x[1].strip(), x[0])
                      for x in enumerate(f_vocab.readlines())])

    extractor = ImageFeatureExtractor(vocab, url, **kwargs)
    df = df.loc[pd.notnull(df['airport'])]
    X = extractor._transform(zip(df['mid'], df['url']), cache)
    df['feat'] = [x for x in X]

    # Group our posts by userid and combine the bow and locations into a single feature
    data = {'uid': [], 'feat': [], 'loc': []}
    for k, vs in df.groupby('uid').groups.iteritems():
        locs = dict(df.loc[vs]['airport'].value_counts()).items()
        feats = reduce(lambda x, y: x + y, df.loc[vs]['feat'])
        # We are only interested in users with at least one location
        if len(locs) > 0:
            data['uid'].append(k)
            data['loc'].append(locs)
            data['feat'].append(feats)

    df_user = pd.DataFrame(data)

    if loc_only:
        df_user = df_user.loc[pd.notnull(df_user['loc'])]

    X = np.array([x for x in df_user['feat']])
    y_loc = df_user['loc']

    print "User Dataset: %s, %s" % (str(X.shape), str(y_loc.shape))

    extractor = ImageFeatureExtractor(vocab, url, **kwargs)

    return (X, y_loc, extractor)