Ejemplo n.º 1
0
 def map_init(self):
     pickle_ = u.pickle_load(self.params['total_file'])
     self.totals = pickle_['projects']
     # Compute masks (find days with insufficient data)
     try:
         # use precomputed mask if available (Wikipedia)
         self.mask = pickle_['mask']
     except KeyError:
         # compute a mask for Twitter
         pdata = self.totals['t@']
         mask = [
             tweet.is_enough(pdata['series'].date(i),
                             pdata['series'][i],
                             sample_rate=self.params['tw_sample_rate'])
             for i in xrange(len(pdata['series']))
         ]
         self.mask = math_.Date_Vector(pdata['series'].first_day,
                                       np.array(mask, dtype=np.bool))
     if (self.mask.sum() < 0.5 * len(self.mask)):
         u.abort('too many low-data days (%d of %d); check sample rate?' %
                 (self.mask.sum(), len(self.mask)))
     # Read target time series
     self.targets = list()
     short_names = u.without_common_prefix(self.params['input_sss'])
     for (sn, ln) in zip(short_names, self.params['input_sss']):
         e = ssheet.Excel(file_=ln)
         for (name, (series, mask)) in e.data.iteritems():
             name = '%s:%s' % (urllib.quote_plus(u.without_ext(
                 sn, '.xls')), urllib.quote_plus(name))
             self.targets.append({
                 'name': name,
                 'series': series,
                 'mask': mask
             })
Ejemplo n.º 2
0
 def map_init(self):
    pickle_ = u.pickle_load(self.params['total_file'])
    self.totals = pickle_['projects']
    # Compute masks (find days with insufficient data)
    try:
       # use precomputed mask if available (Wikipedia)
       self.mask = pickle_['mask']
    except KeyError:
       # compute a mask for Twitter
       pdata = self.totals['t@']
       mask = [tweet.is_enough(pdata['series'].date(i),
                               pdata['series'][i],
                               sample_rate=self.params['tw_sample_rate'])
               for i in xrange(len(pdata['series']))]
       self.mask = math_.Date_Vector(pdata['series'].first_day,
                                     np.array(mask, dtype=np.bool))
    if (self.mask.sum() < 0.5 * len(self.mask)):
       u.abort('too many low-data days (%d of %d); check sample rate?'
               % (self.mask.sum(), len(self.mask)))
    # Read target time series
    self.targets = list()
    short_names = u.without_common_prefix(self.params['input_sss'])
    for (sn, ln) in zip(short_names, self.params['input_sss']):
       e = ssheet.Excel(file_=ln)
       for (name, (series, mask)) in e.data.iteritems():
          name = '%s:%s' % (urllib.quote_plus(u.without_ext(sn, '.xls')),
                            urllib.quote_plus(name))
          self.targets.append({ 'name':   name,
                                'series': series,
                                'mask':   mask })
Ejemplo n.º 3
0
def graph_load():
    g = u.pickle_load(args.in_ + '/articles/wiki-graph.pkl.gz')
    for root in g.keys():
        g[root] = {
            timeseries.name_url_canonicalize(url): dist
            for (url, dist) in g[root].items()
        }
    return g
Ejemplo n.º 4
0
def graph_load():
   g = u.pickle_load(args.in_ + '/articles/wiki-graph.pkl.gz')
   for root in g.keys():
      g[root] = { timeseries.name_url_canonicalize(url): dist
                  for (url, dist) in g[root].items() }
   return g
Ejemplo n.º 5
0
 def unshrink_from_disk(self, dir_, model=False, results=False):
    assert (model or results)
    if (model and isinstance(self.model, u.Deleted_To_Save_Memory)):
       self.model = u.pickle_load('%s/model.%d' % (dir_, self.i))
    if (results and isinstance(self.results, u.Deleted_To_Save_Memory)):
       self.results = u.pickle_load('%s/results.%d' % (dir_, self.i))
Ejemplo n.º 6
0
 def unshrink_from_disk(self, dir_, model=False, results=False):
     assert (model or results)
     if (model and isinstance(self.model, u.Deleted_To_Save_Memory)):
         self.model = u.pickle_load('%s/model.%d' % (dir_, self.i))
     if (results and isinstance(self.results, u.Deleted_To_Save_Memory)):
         self.results = u.pickle_load('%s/results.%d' % (dir_, self.i))