Example #1
0
 def __init__(self):
   self.wpattern_histogram = Dict2()
   self.concurso = ConcursoBase()
   self.n_slots = 5
   tildefiner = TilDefiner(self.n_slots, self.concurso.N_DE_DEZENAS)
   self.tilprodbyNBase = TilProducerByNumberBase(tildefiner)
   self.run_history()
   self.summarize()
Example #2
0
 def __init__(self):
   self.distancesDict = {}
   self.frequencies_within_spike_range = {}
   self.spiked_tuple_list = None  # eg. [(1,40),(3,60),(36,47),(42,49)...]
   self.concursoBase = ConcursoBase()
   self.process()
   #self.distancesDict = distancesDict    
   self.summarize()
Example #3
0
class TilRAnalyzer(object):
  
  def __init__(self):
    self.wpattern_histogram = Dict2()
    self.concurso = ConcursoBase()
    self.n_slots = 5
    tildefiner = TilDefiner(self.n_slots, self.concurso.N_DE_DEZENAS)
    self.tilprodbyNBase = TilProducerByNumberBase(tildefiner)
    self.run_history()
    self.summarize()
  
  def run_history(self):
    for nDoConc in xrange(1301, self.concurso.get_total_concursos()+1):
      concurso = self.concurso.get_concurso_by_nDoConc(nDoConc)
      tilrobj = TilR(self.n_slots, concurso)
      wpatt = tilrobj.get_wpattern()
      wpatt_index = self.tilprodbyNBase.index(wpatt)
      print concurso, wpatt
      self.wpattern_histogram.add1_or_set1_to_key((wpatt_index, wpatt))

  def summarize(self):
    to_unpack = self.wpattern_histogram.items()
    index_and_pattern_tuple_list, n_occurrences = zip(*to_unpack )
    index_list, pattern_list = zip(*index_and_pattern_tuple_list)
    triple_list = zip(index_list, pattern_list, n_occurrences)  
    triple_list.sort( key = lambda x : x[2])
    triple_list.reverse()
    n_last_index = self.tilprodbyNBase.get_total() - 1
    for triple in triple_list:
      wpattern = triple[1]
      self.tilprodbyNBase.move_to_wpattern(wpattern)
      n_combinations = self.tilprodbyNBase.get_n_combinations(self.concurso) 
      print triple, n_combinations 
    print len(self.wpattern_histogram)
    print 'not happened'; c=0
    for index in xrange(n_last_index + 1):
      if index not in index_list:
        c+=1
        print '>>>%d' %c, index, self.tilprodbyNBase.at(index), '::',
Example #4
0
class DistanceAllOccurrred(object):
  '''
  The idea behind this metric is to measure the number of concursos, from a past concurso onwards, all dozens appear.
  Eg. It took 41 concursos at the beginning of Megasena for all 60 dozens appear (at least one, which happens for at least one dozen)
  As of this time, max "spike-distance", as it is called, is 87. Min is 27.  Average is around 41/42.
  '''
  
  def __init__(self):
    self.distancesDict = {}
    self.frequencies_within_spike_range = {}
    self.spiked_tuple_list = None  # eg. [(1,40),(3,60),(36,47),(42,49)...]
    self.concursoBase = ConcursoBase()
    self.process()
    #self.distancesDict = distancesDict    
    self.summarize()
    
  def process(self):
    FINISH_PROCESSING = False
    for parked_nDoConc in range(1, self.concursoBase.get_total_concursos() + 1):
      # print 'Processing DistanceAllOccurrred metric:', parked_nDoConc, 
      concurso = self.concursoBase.get_concurso_by_nDoConc(parked_nDoConc) # concurso_parked
      all_dezenas_frequency_dict = DezenasVolanteFrequencyDict(self.concursoBase.N_DE_DEZENAS_NO_VOLANTE)
      all_dezenas_frequency_dict.add_1_to_values_given_key_list(concurso.get_dezenas())
      distance = 1
      while 1:
        if not all_dezenas_frequency_dict.is_there_still_a_zero_among_values():
          # print parked_nDoConc, ':: n of concs that span occurences of all dozens = ', distance
          self.distancesDict[parked_nDoConc] = distance 
          self.frequencies_within_spike_range[parked_nDoConc] = all_dezenas_frequency_dict.extract_frequencies_in_order()
          print ' >>>>>>>>> freq within spike', self.frequencies_within_spike_range[parked_nDoConc]
          break
        concurso = concurso.get_next()
        if concurso == None:
          FINISH_PROCESSING = True
          self.frequencies_within_spike_range[parked_nDoConc] = all_dezenas_frequency_dict.extract_frequencies_in_order()
          break
        all_dezenas_frequency_dict.add_1_to_values_given_key_list(concurso.get_dezenas())
        # print 'all_dezenas_dict', all_dezenas_dict 
        distance += 1
    if FINISH_PROCESSING:
      return

  def get_last_nDoConc_that_has_distance(self):
    pass
    nDoConcWithDistanceTupleList = self.distancesDict.items()
    # nDoConcWithDistanceTupleList.sort( key lambda x,y: )
    return nDoConcWithDistanceTupleList

  def compact_distanceDict_into_spikes(self):
    '''
    This metric generally happens in a decreasing manner, one by one, until a "spike"
     # eg. [(1,39),(3,59),(36,46),(42,48)...]
     The example (eg) can be read as such:
     - conc 1 needs another 38 concs to have all dozens happen at least once, ie, have every one of them occurring
     - conc 2, though it's not there, needs another 37 (ie, 38-1)
     - conc 3 "spikes" needing another 58 concs
     - from conc 4 to conc 35, distance diminishes one by one, ie, (4,58),(5,57),...,(35,27)
     - conc 36 "spikes" again
    '''
    self.spiked_tuple_list = []
    frequencies_within_spike_range_to_retain = {}
    if self.distancesDict.items() == 0:
      return
    nDoConcWithDistanceTupleList = self.distancesDict.items()
    first_spiked = nDoConcWithDistanceTupleList[0]
    self.spiked_tuple_list = [first_spiked]
    for i, nDoConcWithDistanceTuple in enumerate(nDoConcWithDistanceTupleList[1:]):
      previous_distance = nDoConcWithDistanceTupleList[i][1]
      distance = nDoConcWithDistanceTuple[1] 
      if previous_distance != distance + 1:  # CAUTION: the "i" here is tricky, because index starts looping at 1. whereas i (from enumerate()) starts at 0
        # this means: a spike happened, so register it
        self.spiked_tuple_list.append(nDoConcWithDistanceTuple)
        nDoConc = nDoConcWithDistanceTuple[0]
        frequencies_within_spike_range_to_retain[nDoConc] = self.frequencies_within_spike_range[nDoConc][:] # must be a hard copy, for right-side object will be reassigned
    # self.frequencies_within_spike_range will have only the spikes, not all history
    self.frequencies_within_spike_range = frequencies_within_spike_range_to_retain          
    self.generate_stats()
      
  def generate_stats(self):
    self.spiked_nDoConcs, self.spiked_distances = zip(*self.get_spiked_tuple_list())
    self.spiked_distances = numpy.array(self.spiked_distances)
    self.max_distance = max(self.spiked_distances)
    self.min_distance = min(self.spiked_distances)
    self.avg_distance = self.spiked_distances.mean()
    self.std_distance = self.spiked_distances.std()

  def get_spiked_tuple_list(self, reprocess=False):
    if self.spiked_tuple_list != None and not reprocess:
      return self.spiked_tuple_list
    self.compact_distanceDict_into_spikes()
    return self.spiked_tuple_list

  def summarize(self):
    print self.distancesDict
    print self.get_spiked_tuple_list()
    for nDoConc in self.frequencies_within_spike_range: print nDoConc, self.frequencies_within_spike_range[nDoConc] 
    print 'max', self.max_distance
    print 'min', self.min_distance
    print 'avg', self.avg_distance
    print 'std', self.std_distance
    print 'strides', self.spiked_distances.strides
    #print 'cumsum', self.spiked_distances.cumsum()
    print 'len spiked list', len(self.get_spiked_tuple_list())