def predict(movie_id, nbmodel, imdb_db):
  """
  returns the predicted class labels (defined above) for 
  user-rating and gross-budget-mult by choosing the class 
  with the highest posterior probability
  """
  
  # generate custom movie dict by ingesting features from the database
  movie = hydrate(movie_id, imdb_db, MAX_ACTORS)
  true_rating = movie['rating']
  true_bmult = movie['bmult']
  
  # initialize posteriors
  # pos_rating = [0] * len(BINS_RATING)
  # pos_bmult = [0] * len(BINS_BMULT)
  pos_rating = {}
  pos_bmult = {}
  
  for feat in FEATURES:
    for id_ in movie[feat]:
      for br in BINS_RATING:
        stmt = "nbmodel['%s'].setdefault(id_,{}).setdefault(br,0)" % feat
        val = eval(stmt)
        try:
          pos_rating[br] += val
        except KeyError:
          pos_rating[br] = val
      for bm in BINS_BMULT:
        stmt = "nbmodel['%s'].setdefault(id_,{}).setdefault(bm,0)" % feat
        val = eval(stmt)
        try:
          pos_bmult[bm] += val
        except KeyError:
          pos_bmult[bm] = val

  # add class priors
  for br in BINS_RATING:
    pos_rating[br] += nbmodel['rating'][br]
  for bm in BINS_BMULT:
    pos_bmult[bm] += nbmodel['bmult'][bm]
  
  pred_rating = max(pos_rating.iteritems(), key=operator.itemgetter(1))[0]
  pred_bmult = max(pos_bmult.iteritems(), key=operator.itemgetter(1))[0]

  return ([true_rating, pred_rating], [true_bmult, pred_bmult])
sys.stdout.write('Loading imdb.db... ')
sys.stdout.flush()
ia = imdb.IMDb('sql', uri=db_uri)
sys.stdout.write('[done]\n')

# all pruning will be done in movielist
mlist = open(MOVIE_FILE, 'r')
mov_id = mlist.readline().strip()

max_budget = 0 # for normalization purposes

while mov_id != '':
    sys.stdout.write('Reading movie #' + mov_id + ': ')
    sys.stdout.flush()

    movie = hydrate(mov_id, ia, MAX_ACTORS)

    sys.stdout.write(movie['title'])
    sys.stdout.flush()

    # initialize feature vector
    current_fv = [0]*FV_LENGTH

    # generate output labels
    rating_labels.append(BINS_RATING.index(movie['rating']))
    bmult_labels.append(BINS_BMULT.index(movie['bmult']))
     
    # Populate feature vector
    '''
    for actor_id in iter(movie['actor']):
        current_fv[PERSON_OFFSET + person_fvid[actor_id]] = 1