Python copy_and_shuffle_sublists Exemples, experiments.copy_and_shuffle_sublists Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : oneaccuracy.py Projet : piyushbansal/ir-crowd-thesis

def get_last_accuracy_in_sequence(estimator, n_votes_to_sample, texts, 
  vote_lists, truths, X, text_similarity, idx=None, return_final=False, *args):
  """ Randomly sample votes and re-calculate estimates.
  """
  random.seed()

  unknown_votes = copy_and_shuffle_sublists(vote_lists)
  known_votes = [ [] for _ in unknown_votes ]

  estimates = [None for _ in vote_lists]

  accuracy_sequence = [None] * n_votes_to_sample

  # This is a crowdsourcing procedure
  for index in xrange(n_votes_to_sample):
    # Counter
    # sys.stderr.write(str(index)+'\n')

    # Draw one vote for a random document
    updated_doc_idx = random.randrange(len(vote_lists))
    if not unknown_votes[updated_doc_idx]:
      # We ran out of votes for this document, diregard this sequence
      return None
    vote = unknown_votes[updated_doc_idx].pop()
    known_votes[updated_doc_idx].append(vote)
    
  # Calculate all the estimates
  try:
    estimates = estimator(texts, known_votes, X, text_similarity, *args)
    #sys.stderr.write('Success\n')
    return get_accuracy(estimates, truths)
  except Exception, e:
    traceback.print_exc()
    #sys.stdout.write('Fail\n')
    return None

Exemple #2

0

Afficher le fichier

Fichier : oneaccuracy.py Projet : piyushbansal/ir-crowd-thesis

def get_last_accuracy_in_sequence(estimator,
                                  n_votes_to_sample,
                                  texts,
                                  vote_lists,
                                  truths,
                                  X,
                                  text_similarity,
                                  idx=None,
                                  return_final=False,
                                  *args):
    """ Randomly sample votes and re-calculate estimates.
  """
    random.seed()

    unknown_votes = copy_and_shuffle_sublists(vote_lists)
    known_votes = [[] for _ in unknown_votes]

    estimates = [None for _ in vote_lists]

    accuracy_sequence = [None] * n_votes_to_sample

    # This is a crowdsourcing procedure
    for index in xrange(n_votes_to_sample):
        # Counter
        # sys.stderr.write(str(index)+'\n')

        # Draw one vote for a random document
        updated_doc_idx = random.randrange(len(vote_lists))
        if not unknown_votes[updated_doc_idx]:
            # We ran out of votes for this document, diregard this sequence
            return None
        vote = unknown_votes[updated_doc_idx].pop()
        known_votes[updated_doc_idx].append(vote)

    # Calculate all the estimates
    try:
        estimates = estimator(texts, known_votes, X, text_similarity, *args)
        #sys.stderr.write('Success\n')
        return get_accuracy(estimates, truths)
    except Exception, e:
        traceback.print_exc()
        #sys.stdout.write('Fail\n')
        return None

Exemple #3

0

Afficher le fichier

Fichier : oneaccuracy.py Projet : piyushbansal/ir-crowd-thesis

def sample_and_minimise_entropy(final_estimator, estimator, n_votes_to_sample, texts, 
  vote_lists, truths, X, text_similarity, idx=None, return_final=False, *args):
  """ Active learning scenario, where we decide at each step what document to pick.
  Then we ask a worker to label it for us. Our hypothesis is that it should be better
  than randomly asking for labels of some documents. We're only picking docs
  that minimise the uncertainity in system.
  """

  random.seed()

  unknown_votes = copy_and_shuffle_sublists(vote_lists)
  known_votes = [ [] for _ in unknown_votes ]

  random_vote_taken = False
  doc_to_be_sampled = None
  
  #Continue until you can get one vote for some document.
  #This is just some way of initialisation.
  while not random_vote_taken:
    #print "Trying to take the first vote"
    updated_doc_idx = random.randrange(len(vote_lists))
    if not unknown_votes[updated_doc_idx]:
      continue
    else:
      #Note that the vote is not replaced here for now. 
      #Since we're dealing with low budget scenario, this should be fine.
      vote = unknown_votes[updated_doc_idx].pop()
      known_votes[updated_doc_idx].append(vote)
      random_vote_taken = True

  #print "First vote taken ", updated_doc_idx, vote 
  #print known_votes
  for index in xrange(n_votes_to_sample):
    # This is how the system entropy looks like, right now.
    # We have just sampled one vote about a random document.
    #print "Sampling vote number ", index

    if doc_to_be_sampled is not None:
      #print "Document Sampled ", doc_to_be_sampled
      try:
        vote = unknown_votes[doc_to_be_sampled].pop()
        known_votes[doc_to_be_sampled].append(vote)
      except IndexError:
        vote = None
        #print "Picking random vote, as all votes exhausted for this document"
        #TODO(What better can be done? I'm taking a random vote, if no vote is available)
        known_votes[doc_to_be_sampled].append(bool(random.randint(0,1)))

      labels = estimator(texts, known_votes, X, text_similarity, *args)  
      known_votes = add_lambda_votes_to_vote_lists(doc_to_be_sampled, known_votes, labels)
    
    #print known_votes
    last_iter_entropy = float("inf")
    
    # Pick a document that will minimise the system entropy the most
    for doc_index, doc_vote_list in enumerate(known_votes):
      copy_known_votes = copy.deepcopy(known_votes)
      #print known_votes
      #Adding a positive vote to this document.
      copy_known_votes[doc_index].append(True)

      # At this point, we can either add full votes, or a portion of those.
      # Consider GP, we can add probability which is outputted at the end.
      labels = estimator(texts, copy_known_votes, X, text_similarity, *args)

      known_votes_plus_labels = add_lambda_votes_to_vote_lists(doc_index, copy_known_votes, labels)

      relevance_label_added_system_entropy = get_system_entropy(known_votes_plus_labels)

      #Adding a negative vote to this document.
      copy_known_votes[doc_index].pop(-1)
      copy_known_votes[doc_index].append(False)
      
      # At this point, we can either add full votes, or a portion of those.
      # Consider GP, we can add probability which is outputted at the end.
      labels = estimator(texts, copy_known_votes, X, text_similarity, *args)

      known_votes_plus_labels = add_lambda_votes_to_vote_lists(doc_index, copy_known_votes, labels)

      non_relevance_label_added_system_entropy = get_system_entropy(known_votes_plus_labels)

      #Calculating the average entropy of the system in both cases.
      doc_avg_system_entropy = (relevance_label_added_system_entropy + non_relevance_label_added_system_entropy) / 2
      
      #print "Looking at doc, and it's entropy", doc_index, doc_avg_system_entropy
      #Restore the state of the doc_vote_list.
      copy_known_votes[doc_index].pop()

      if doc_avg_system_entropy < last_iter_entropy:
        doc_to_be_sampled = doc_index
        last_iter_entropy = doc_avg_system_entropy
        #print "current best doc and entropy ", doc_index, doc_avg_system_entropy

  if doc_to_be_sampled is not None:
    try:
      vote = unknown_votes[doc_to_be_sampled].pop()
      known_votes[doc_to_be_sampled].append(vote)
    except IndexError:
      vote = None
      #TODO(What better can be done? I'm taking a random vote, if no vote is available)
      known_votes[doc_to_be_sampled].append(bool(random.randint(0,1)))

    labels = estimator(texts, known_votes, X, text_similarity, *args)  
    known_votes = add_lambda_votes_to_vote_lists(doc_to_be_sampled, known_votes, labels)

  try:
    estimates = final_estimator(texts, known_votes, X, text_similarity, *args)
    #sys.stderr.write('Success\n')
    return get_accuracy(estimates, truths)
  except Exception, e:
    traceback.print_exc()
    #sys.stdout.write('Fail\n')
    return None

Exemple #4

0

Afficher le fichier

Fichier : oneaccuracy.py Projet : piyushbansal/ir-crowd-thesis

def sample_gp_variance_min_entropy(estimator, n_votes_to_sample, texts, 
  vote_lists, truths, X, text_similarity, idx=None, return_final=False, *args):
  """ Randomly sample votes and re-calculate estimates.
  """
  random.seed()

  unknown_votes = copy_and_shuffle_sublists(vote_lists)
  known_votes = [ [] for _ in unknown_votes ]

  estimates = [None for _ in vote_lists]

  accuracy_sequence = [None] * n_votes_to_sample
  curr_doc_selected = None

  # This is a crowdsourcing procedure
  for index in xrange(n_votes_to_sample):
    # Counter
    # sys.stderr.write(str(index)+'\n')
    
    max_variance_seen = 0
    # Draw one vote for a random document
    if curr_doc_selected is None:
      updated_doc_idx = random.randrange(len(vote_lists))
      if not unknown_votes[updated_doc_idx]:
        # We ran out of votes for this document, diregard this sequence
        return None
      vote = unknown_votes[updated_doc_idx].pop()
      known_votes[updated_doc_idx].append(vote)
    else:
      #print "Selected doc number ", curr_doc_selected
      try:
        vote = unknown_votes[curr_doc_selected].pop()
      except IndexError:
        # We ran out of votes for this document, disregard this.
        return None
      known_votes[curr_doc_selected].append(vote)
      print known_votes 
    estimates = estimator(texts, known_votes, X, text_similarity, *args)
    #print estimates
    #sorted_estimates = sorted(enumerate(estimates), key=lambda x: x[1][1])
    estimates = list(estimates)
    #print len(estimates)
    
    num_votes_step = sum(map(lambda x: bool(x), known_votes))/ len(unknown_votes)
    print num_votes_step
    possibilities = filter(lambda x: len(known_votes[x[0]]) < num_votes_step + 1 ,enumerate(estimates))
    #print possibilities, len(possibilities), list(enumerate(estimates))
    #Just need to get the document index, which is element[0] for enumerate(estimates)
    
    try:
      curr_doc_selected = get_best_sample(possibilities,[x[1][1] for x in possibilities])[0]
      #curr_doc_selected = get_weighted_sample(possibilities,[x[1][1] for x in possibilities])[0]
    except:
      print "Excepted"
      curr_doc_selected = get_best_sample(enumerate(estimates),[x[1] for x in estimates])[0]
      #curr_doc_selected = get_weighted_sample(enumerate(estimates),[x[1] for x in estimates])[0]
    print curr_doc_selected

    #curr_doc_selected = random.choice([element[0] for element in sorted_estimates][:5])
    #for doc_index, (label, variance) in enumerate(estimates):
      #print doc_index, label, variance
      #if variance > max_variance_seen:
      #  max_variance_seen = variance
      #  curr_doc_selected = doc_index
      
  # Calculate all the estimates
  try:
    estimates = estimator(texts, known_votes, X, text_similarity, *args)
    labels = [x[0] for x in estimates]
    #sys.stderr.write('Success\n')
    return get_accuracy(labels, truths)
  except Exception, e:
    traceback.print_exc()
    #sys.stdout.write('Fail\n')
    return None

Exemple #5

0

Afficher le fichier

Fichier : oneaccuracy.py Projet : piyushbansal/ir-crowd-thesis

def sample_and_minimise_entropy(final_estimator,
                                estimator,
                                n_votes_to_sample,
                                texts,
                                vote_lists,
                                truths,
                                X,
                                text_similarity,
                                idx=None,
                                return_final=False,
                                *args):
    """ Active learning scenario, where we decide at each step what document to pick.
  Then we ask a worker to label it for us. Our hypothesis is that it should be better
  than randomly asking for labels of some documents. We're only picking docs
  that minimise the uncertainity in system.
  """

    random.seed()

    unknown_votes = copy_and_shuffle_sublists(vote_lists)
    known_votes = [[] for _ in unknown_votes]

    random_vote_taken = False
    doc_to_be_sampled = None

    #Continue until you can get one vote for some document.
    #This is just some way of initialisation.
    while not random_vote_taken:
        #print "Trying to take the first vote"
        updated_doc_idx = random.randrange(len(vote_lists))
        if not unknown_votes[updated_doc_idx]:
            continue
        else:
            #Note that the vote is not replaced here for now.
            #Since we're dealing with low budget scenario, this should be fine.
            vote = unknown_votes[updated_doc_idx].pop()
            known_votes[updated_doc_idx].append(vote)
            random_vote_taken = True

    #print "First vote taken ", updated_doc_idx, vote
    #print known_votes
    for index in xrange(n_votes_to_sample):
        # This is how the system entropy looks like, right now.
        # We have just sampled one vote about a random document.
        #print "Sampling vote number ", index

        if doc_to_be_sampled is not None:
            #print "Document Sampled ", doc_to_be_sampled
            try:
                vote = unknown_votes[doc_to_be_sampled].pop()
                known_votes[doc_to_be_sampled].append(vote)
            except IndexError:
                vote = None
                #print "Picking random vote, as all votes exhausted for this document"
                #TODO(What better can be done? I'm taking a random vote, if no vote is available)
                known_votes[doc_to_be_sampled].append(
                    bool(random.randint(0, 1)))

            labels = estimator(texts, known_votes, X, text_similarity, *args)
            known_votes = add_lambda_votes_to_vote_lists(
                doc_to_be_sampled, known_votes, labels)

        #print known_votes
        last_iter_entropy = float("inf")

        # Pick a document that will minimise the system entropy the most
        for doc_index, doc_vote_list in enumerate(known_votes):
            copy_known_votes = copy.deepcopy(known_votes)
            #print known_votes
            #Adding a positive vote to this document.
            copy_known_votes[doc_index].append(True)

            # At this point, we can either add full votes, or a portion of those.
            # Consider GP, we can add probability which is outputted at the end.
            labels = estimator(texts, copy_known_votes, X, text_similarity,
                               *args)

            known_votes_plus_labels = add_lambda_votes_to_vote_lists(
                doc_index, copy_known_votes, labels)

            relevance_label_added_system_entropy = get_system_entropy(
                known_votes_plus_labels)

            #Adding a negative vote to this document.
            copy_known_votes[doc_index].pop(-1)
            copy_known_votes[doc_index].append(False)

            # At this point, we can either add full votes, or a portion of those.
            # Consider GP, we can add probability which is outputted at the end.
            labels = estimator(texts, copy_known_votes, X, text_similarity,
                               *args)

            known_votes_plus_labels = add_lambda_votes_to_vote_lists(
                doc_index, copy_known_votes, labels)

            non_relevance_label_added_system_entropy = get_system_entropy(
                known_votes_plus_labels)

            #Calculating the average entropy of the system in both cases.
            doc_avg_system_entropy = (
                relevance_label_added_system_entropy +
                non_relevance_label_added_system_entropy) / 2

            #print "Looking at doc, and it's entropy", doc_index, doc_avg_system_entropy
            #Restore the state of the doc_vote_list.
            copy_known_votes[doc_index].pop()

            if doc_avg_system_entropy < last_iter_entropy:
                doc_to_be_sampled = doc_index
                last_iter_entropy = doc_avg_system_entropy
                #print "current best doc and entropy ", doc_index, doc_avg_system_entropy

    if doc_to_be_sampled is not None:
        try:
            vote = unknown_votes[doc_to_be_sampled].pop()
            known_votes[doc_to_be_sampled].append(vote)
        except IndexError:
            vote = None
            #TODO(What better can be done? I'm taking a random vote, if no vote is available)
            known_votes[doc_to_be_sampled].append(bool(random.randint(0, 1)))

        labels = estimator(texts, known_votes, X, text_similarity, *args)
        known_votes = add_lambda_votes_to_vote_lists(doc_to_be_sampled,
                                                     known_votes, labels)

    try:
        estimates = final_estimator(texts, known_votes, X, text_similarity,
                                    *args)
        #sys.stderr.write('Success\n')
        return get_accuracy(estimates, truths)
    except Exception, e:
        traceback.print_exc()
        #sys.stdout.write('Fail\n')
        return None

Exemple #6

0

Afficher le fichier

Fichier : oneaccuracy.py Projet : piyushbansal/ir-crowd-thesis

def sample_gp_variance_min_entropy(estimator,
                                   n_votes_to_sample,
                                   texts,
                                   vote_lists,
                                   truths,
                                   X,
                                   text_similarity,
                                   idx=None,
                                   return_final=False,
                                   *args):
    """ Randomly sample votes and re-calculate estimates.
  """
    random.seed()

    unknown_votes = copy_and_shuffle_sublists(vote_lists)
    known_votes = [[] for _ in unknown_votes]

    estimates = [None for _ in vote_lists]

    accuracy_sequence = [None] * n_votes_to_sample
    curr_doc_selected = None

    # This is a crowdsourcing procedure
    for index in xrange(n_votes_to_sample):
        # Counter
        # sys.stderr.write(str(index)+'\n')

        max_variance_seen = 0
        # Draw one vote for a random document
        if curr_doc_selected is None:
            updated_doc_idx = random.randrange(len(vote_lists))
            if not unknown_votes[updated_doc_idx]:
                # We ran out of votes for this document, diregard this sequence
                return None
            vote = unknown_votes[updated_doc_idx].pop()
            known_votes[updated_doc_idx].append(vote)
        else:
            #print "Selected doc number ", curr_doc_selected
            try:
                vote = unknown_votes[curr_doc_selected].pop()
            except IndexError:
                # We ran out of votes for this document, disregard this.
                return None
            known_votes[curr_doc_selected].append(vote)
            print known_votes
        estimates = estimator(texts, known_votes, X, text_similarity, *args)
        #print estimates
        #sorted_estimates = sorted(enumerate(estimates), key=lambda x: x[1][1])
        estimates = list(estimates)
        #print len(estimates)

        num_votes_step = sum(map(lambda x: bool(x),
                                 known_votes)) / len(unknown_votes)
        print num_votes_step
        possibilities = filter(
            lambda x: len(known_votes[x[0]]) < num_votes_step + 1,
            enumerate(estimates))
        #print possibilities, len(possibilities), list(enumerate(estimates))
        #Just need to get the document index, which is element[0] for enumerate(estimates)

        try:
            curr_doc_selected = get_best_sample(
                possibilities, [x[1][1] for x in possibilities])[0]
            #curr_doc_selected = get_weighted_sample(possibilities,[x[1][1] for x in possibilities])[0]
        except:
            print "Excepted"
            curr_doc_selected = get_best_sample(enumerate(estimates),
                                                [x[1] for x in estimates])[0]
            #curr_doc_selected = get_weighted_sample(enumerate(estimates),[x[1] for x in estimates])[0]
        print curr_doc_selected

        #curr_doc_selected = random.choice([element[0] for element in sorted_estimates][:5])
        #for doc_index, (label, variance) in enumerate(estimates):
        #print doc_index, label, variance
        #if variance > max_variance_seen:
        #  max_variance_seen = variance
        #  curr_doc_selected = doc_index

    # Calculate all the estimates
    try:
        estimates = estimator(texts, known_votes, X, text_similarity, *args)
        labels = [x[0] for x in estimates]
        #sys.stderr.write('Success\n')
        return get_accuracy(labels, truths)
    except Exception, e:
        traceback.print_exc()
        #sys.stdout.write('Fail\n')
        return None

Exemple #7

0

Afficher le fichier

Fichier : accuracy_seq.py Projet : piyushbansal/ir-crowd-thesis

def sample_min_entropy_kde(estimator_dict, start_idx, n_votes_to_sample, texts,
  vote_lists, truths, X, text_similarity, idx=None, return_final=False, *args):
  """ Randomly sample votes and re-calculate estimates.
  """
  random.seed()

  unknown_votes = copy_and_shuffle_sublists(vote_lists)
  
  accuracy_sequences = {}

  for estimator_name, estimator_args in estimator_dict.iteritems():
    estimator, args = estimator_args
    accuracy_sequences[estimator_name] = []

    known_votes = [ [] for _ in unknown_votes ]

    estimates = [None for _ in vote_lists]

    curr_doc_selected = None

    document_idx_vote_seq = []

    document_vote_counts = [ 0 for _ in vote_lists ]

    for votes_required in range(start_idx):
      min_vote_doc_idxs = get_indexes_of_smallest_elements(document_vote_counts)
      updated_doc_idx = random.choice(min_vote_doc_idxs)
      document_vote_counts[updated_doc_idx] += 1

      # Randomly pick a vote for this document
      vote_idx = random.randrange(len(vote_lists[updated_doc_idx]))

      vote = vote_lists[updated_doc_idx][vote_idx]
      document_idx_vote_seq.append( (updated_doc_idx, vote ) )

    for document_idx, vote in document_idx_vote_seq:
      known_votes[document_idx].append(vote)
    print n_votes_to_sample
 
    # This is a crowdsourcing procedure
    for index in xrange(n_votes_to_sample):
      print "Sampling vote number ", index

      # Draw one vote for a random document
      if curr_doc_selected is None:
        updated_doc_idx = random.randrange(len(vote_lists))
        if not unknown_votes[updated_doc_idx]:
          # We ran out of votes for this document, diregard this sequence
          return None
        vote = random.choice(unknown_votes[updated_doc_idx])
        known_votes[updated_doc_idx].append(vote)
      else:   
        #print "Selected doc number ", curr_doc_selected
        try:    
          vote = random.choice(unknown_votes[curr_doc_selected])
        except IndexError:
          # We ran out of votes for this document, disregard this sequence
          return None
        known_votes[curr_doc_selected].append(vote)
        print "Known votes ", known_votes 
      estimates = estimator(texts, known_votes, X, text_similarity, *args)
      #Just need to get the document index, which is element[0] for enumerate(estimates)
      
      estimates = list(estimates)
      #print len(estimates)
      num_votes_step = sum(map(lambda x: len(x), known_votes))/ len(unknown_votes)
      #print num_votes_step
      possibilities = filter(lambda x: len(known_votes[x[0]]) < 1 + num_votes_step ,enumerate(estimates))
      #print possibilities, len(possibilities), list(enumerate(estimates))
      #Just need to get the document index, which is element[0] for enumerate(estimates)
      try:
        #curr_doc_selected = get_best_sample(possibilities,[x[1][1] for x in possibilities])[0]
        #curr_doc_selected = get_weighted_sample(possibilities,[x[1][1] for x in possibilities])[0]
        curr_doc_selected = get_min_entropy_sample(known_votes, possibilities)
      except:
        #print "Excepted"
        #curr_doc_selected = get_best_sample(enumerate(estimates),[x[1] for x in estimates])[0]
        #curr_doc_selected = get_weighted_sample(enumerate(estimates),[x[1] for x in estimates])[0]
        curr_doc_selected = get_min_entropy_sample(known_votes, enumerate(estimates))
      #print "Curr_doc_selected ", curr_doc_selected

      #objects = list(enumerate(estimates))
      #print "estimates ", objects
      #curr_doc_selected = get_weighted_sample(objects,[x[1][1] for x in objects])
      #print curr_doc_selected
      # Calculate all the estimates
      estimates = estimator(texts, known_votes, X, text_similarity, *args)
      #labels = [x[0] for x in estimates]
      try:
        accuracy = get_accuracy(estimates, truths)
        accuracy_sequences[estimator_name].append(accuracy)
      except Exception, e:
        accuracy_sequences[estimator_name].append(None)

Exemple #8

0

Afficher le fichier

Fichier : accuracy_seq.py Projet : piyushbansal/ir-crowd-thesis

def sample_min_entropy(estimator_dict, n_votes_to_sample, texts,
  vote_lists, truths, X, text_similarity, idx=None, return_final=False, *args):
  """ Randomly sample votes and re-calculate estimates.
  """
  random.seed()

  unknown_votes = copy_and_shuffle_sublists(vote_lists)
  
  accuracy_sequences = {}

  for estimator_name, estimator_args in estimator_dict.iteritems():
    estimator, args = estimator_args
    accuracy_sequences[estimator_name] = []

    known_votes = [ [] for _ in unknown_votes ]

    estimates = [None for _ in vote_lists]

    curr_doc_selected = None

    document_idx_vote_seq = []

    document_vote_counts = [ 0 for _ in vote_lists ]

    #Randomly sampling 30 votes first for avoiding bias etc.
    """for votes_required in range(0):
      min_vote_doc_idxs = get_indexes_of_smallest_elements(document_vote_counts)
      updated_doc_idx = random.choice(min_vote_doc_idxs)
      document_vote_counts[updated_doc_idx] += 1

      # Randomly pick a vote for this document
      vote_idx = random.randrange(len(vote_lists[updated_doc_idx]))

      vote = vote_lists[updated_doc_idx][vote_idx]
      document_idx_vote_seq.append( (updated_doc_idx, vote ) )

    for document_idx, vote in document_idx_vote_seq:
      known_votes[document_idx].append(vote)
      estimates = estimator(texts, known_votes, X, text_similarity, *args)
      try:
        accuracy = get_accuracy(estimates, truths)
        accuracy_sequences[estimator_name].append(accuracy)
      except Exception, e:
        print "Pooped"
        return None
    print "known_votes ", known_votes"""
    # This is a crowdsourcing procedure, random sampling end
    for index in xrange(n_votes_to_sample):
      print "Sampling vote number ", index

      # Draw one vote for a random document
      if curr_doc_selected is None:
        print "Sampling random vote yall"
        updated_doc_idx = random.randrange(len(vote_lists))
        if not unknown_votes[updated_doc_idx]:
          # We ran out of votes for this document, diregard this sequence
          return None
        vote = random.choice(unknown_votes[updated_doc_idx])
        known_votes[updated_doc_idx].append(vote)
      else:   
        print "Selected doc number ", curr_doc_selected
        try:    
          vote = random.choice(unknown_votes[curr_doc_selected])
        except IndexError:
          # We ran out of votes for this document, disregard this sequence
          return None
        known_votes[curr_doc_selected].append(vote)
        #print "Known votes ", known_votes

      if not index % 50:
        # While doing density based sampling, we don't really need to do label aggregation at each point.
        # Still doing it at every 50th vote, just to keep this code around for other 
        # sampling methods like entropy based.
        estimates = estimator(texts, known_votes, X, text_similarity, *args)
      
      estimates = list(estimates)
      print estimates, len(estimates)
      num_votes_step = sum(map(lambda x: len(x), known_votes))/ len(unknown_votes)
      print 'num_vote_step ', num_votes_step
      possibilities = filter(lambda x: len(known_votes[x[0]]) < 1 + num_votes_step ,enumerate(known_votes))
      #print possibilities, len(possibilities), list(enumerate(estimates))
      #Just need to get the document index, which is element[0] for enumerate(estimates)
      try:
        #curr_doc_selected = get_best_sample(possibilities,[x[1][1] for x in possibilities])[0]
        #curr_doc_selected = get_weighted_sample(possibilities,[x[1][1] for x in possibilities])[0]
        #curr_doc_selected = get_min_entropy_sample(known_votes, possibilities)
        #curr_doc_selected = get_density_based_best_sample(X, known_votes, possibilities)
        #curr_doc_selected = get_covariance_based_best_sample(X, known_votes, possibilities) 
        curr_doc_selected = get_mutual_information_based_best_sample(X, known_votes, possibilities) 
      except Exception as e:
        print "Excepted", e
        #curr_doc_selected = get_best_sample(enumerate(estimates),[x[1] for x in estimates])[0]
        #curr_doc_selected = get_weighted_sample(enumerate(estimates),[x[1] for x in estimates])[0]
        #curr_doc_selected = get_min_entropy_sample(known_votes, enumerate(estimates))
        curr_doc_selected = get_density_based_best_sample(X, known_votes, enumerate(estimates))
        
      #print "Curr_doc_selected ", curr_doc_selected

      #objects = list(enumerate(estimates))
      #print "estimates ", objects
      #curr_doc_selected = get_weighted_sample(objects,[x[1][1] for x in objects])
      #print curr_doc_selected
      # Calculate all the estimates
      estimates = estimator(texts, known_votes, X, text_similarity, *args)
      #labels = [x[0] for x in estimates]
      try:
        accuracy = get_accuracy(estimates, truths)
        accuracy_sequences[estimator_name].append(accuracy)
      except Exception, e:
        return None