def cross_validate(self, Z, r, not_missing, 
                    folds = 4, ncount = 11, 
                    penalty_min = 0., penalty_max = 1000.,
                    xv_chunk = 1, xv_chunks = 1,
                    print_iter = False,
                    max_epochs = None,
                    reps = 1):
     T = Z.shape[0]
     uni_pen = np.linspace(penalty_min, penalty_max, ncount)
     pens = [(x, y) for x in uni_pen for y in uni_pen]
     pens = util.chunkify(pens, xv_chunks)[xv_chunk-1]
     xvN = len(pens)
     means = np.zeros((reps, folds, xvN))
     variances = np.zeros((reps, folds, xvN))
     iter_sharpe = np.zeros(means.size)
     iter_params = means.size * [0]
     samples = [[tt for tt in range(T) 
                 if int(folds * tt / T) == fold]
                 for fold in range(folds)]
     iterii = 0
     for jj in range(reps):
         for fold in range(folds):
             if folds > 1:
                 fold_sample = np.concatenate([samp for kk, samp in enumerate(samples) if kk != fold])
                 xv_sample = samples[fold]
             else:
                 fold_sample = list(range(T))
                 xv_sample = fold_sample
             for ii, penvals in enumerate(pens):
                 prior_prec, mean_prec = penvals
                 self.restart()
                 self.set_prior_precision(prior_prec, mean_prec)
                 self._fit(Z, r, not_missing, iter_indices = fold_sample, print_iter = print_iter)
                 xv_fit = self._fit(Z, r, not_missing, learning_rate = 0., iter_indices = xv_sample,
                                   print_iter = print_iter, max_epochs = max_epochs)
                 means[jj, fold, ii] = xv_fit['mean']
                 variances[jj, fold, ii] = xv_fit['variance']
                 iter_sharpe[iterii] = xv_fit['sharpe_ratio']
                 iter_params[iterii] = xv_fit['model_parameters']
                 if print_iter:
                     print(('Cross Validation:', fold, ii, means[jj, fold, ii] / np.sqrt(variances[jj, fold, ii])))
                 iterii += 1
     means = np.mean(np.mean(means, axis = 0), axis = 0)
     variances = np.mean(np.mean(variances, axis = 0), axis = 0)
     sharpes = means / np.sqrt(variances)
     best_prec, best_mean_prec = pens[np.argmax(sharpes)]
     xv_results = np.zeros((xvN, len(pens[0]) + 1))
     xv_results[:, :-1] = np.array(pens)
     xv_results[:, -1] = sharpes
     self.set_prior_precision(best_prec, best_mean_prec)
     self.set_model_variables(iter_params[np.argmax(iter_sharpe)])
     return xv_results
Beispiel #2
0
def get_messages(user_email=None, tag=None, process_id=None):
    imap = None
    msg_ids = []
    try:
        imap = IMAPHelper()
        imap.oauth1_2lo_login(user_email=user_email)
        try:
            if tag:
                logging.info('Creating label [%s]', tag)
                imap.create_label(tag)
            msg_ids = imap.list_messages(only_from_trash=True)
        except Exception as e:
            logging.exception('Error creating label or retrieving messages for '
                              'user [%s]', user_email)
            processed_user = ProcessedUser.get_by_id(email)
            if not processed_user:
                processed_user = ProcessedUser(id=user_email, ok_count=0,
                                               error_count=0,
                                               total_count=list(),
                                               error_description=list())
            processed_user.error_description.append(e.message)
            processed_user.put()

            return []
    except Exception as e:
        logging.exception('Authentication or connection problem for user '
                          '[%s]', user_email)
        processed_user = ProcessedUser.get_by_id(user_email)
        if not processed_user:
            processed_user = ProcessedUser(id=user_email, ok_count=0,
                                           error_count=0,
                                           total_count=list(),
                                           error_description=list())
        processed_user.error_description.append(e.message)
        processed_user.put()

        return []
    finally:
        if imap:
            imap.close()
    # Assuming IMAP connection was OK
    if len(msg_ids) > 0:
        counter.load_and_increment_counter('%s_total_count' % user_email,
                                           delta=len(msg_ids),
                                           namespace=str(process_id))
        return chunkify(msg_ids, num_chunks=constants.USER_CONNECTION_LIMIT)
    else:
        counter.load_and_increment_counter('%s_total_count' % user_email,
                                           delta=0,
                                           namespace=str(process_id))

    return []
Beispiel #3
0
def schedule_user_move(user_email=None, tag=None, move_process_key=None,
                       domain_name=None):
    if domain_name:
        try:
            primary_domain = PrimaryDomain.get_or_create(
                domain_name=domain_name)
            if primary_domain.credentials:
                email_settings_helper = EmailSettingsHelper(
                    credentials_json=primary_domain.credentials,
                    domain=domain_name,
                    refresh_token=primary_domain.refresh_token
                )
                email_settings_helper.enable_imap(user_email)
                logging.info('IMAP enabled for [%s]',
                             user_email)
            else:
                logging.warn('Error trying to enable IMAP for user [%s]',
                             user_email)
        except:
            logging.exception('Domain [%s] is not authorized, IMAP not enabled',
                              domain_name)

    for chunk_ids in get_messages(user_email=user_email, tag=tag,
                                  process_id=move_process_key.id()):
        if len(chunk_ids) > 0:
            new_chunk_ids = []
            chunk_sizes = []
            for chunk in chunkify(chunk_ids,
                              chunk_size=constants.MESSAGE_BATCH_SIZE):
                new_chunk_ids.append([chunk[0], chunk[-1]])
                chunk_sizes.append(len(chunk))
            logging.info('Scheduling user [%s] messages move', user_email)
            deferred.defer(move_messages, user_email=user_email, tag=tag,
                           chunk_ids=new_chunk_ids,
                           process_id=move_process_key.id(),
                           chunk_sizes=chunk_sizes)
Beispiel #4
0
def train_dA(train_x, learning_rate=0.1, training_epochs=500, batch_size=30, chunk=3,corruption_level=.3,rel_hidden=.6):
    # transform training data into  what we want
    xs=train_x.get_value(borrow=True)
    real_train = []
    for x in xs:        
        real_train += chunkify(x,chunk)
    train_x = theano.shared(numpy.asarray(real_train,
                                           dtype=theano.config.floatX), borrow=True)
    n_train_batches = train_x.get_value(borrow=True).shape[0] / batch_size
    index = T.lscalar() # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images    

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))
    image_size = train_x.get_value(borrow=True).shape[1]
    da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
            n_visible=image_size, n_hidden=int(rel_hidden*image_size), chunk=chunk)

    cost, updates = da.get_cost_updates(corruption_level=corruption_level,
                                        learning_rate=learning_rate)

    train_da = theano.function([index], cost, updates=updates,
         givens={x: train_x[index * batch_size:(index + 1) * batch_size]})

    for epoch in xrange(training_epochs):
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))
        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)        

    image = PIL.Image.fromarray(tile_raster_images(
        X=da.W.get_value(borrow=True).T,
        img_shape=(chunk, chunk), tile_shape=(int((rel_hidden*image_size)**.5+1),int((rel_hidden*image_size)**.5+1)),
        tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')
    return da
Beispiel #5
0
    def elevation(self, pts, elevation_refs, interpolation_refs, console):
        '''Gets elevation from reference data.'''
        def ElevationURL(url, num, outof, attempt):
            '''Get Elevation data from a url.'''
            if attempt == self.max_failed_queries:
                console.add('elevation', num, outof, False,
                            'Quitting this query. Data will be inaccurate')
                return None, False

            current_time = time.time()
            if current_time < self.next_query_time:  #before earliest possible query time
                time.sleep(self.next_query_time -
                           current_time)  #wait until next query time

            self.next_query_time = current_time + 0.2  #set the next time a query can be fired to 0.2 seconds from now

            try:  #a ton of things can go wrong!
                data = json.load(urlopen(url))
                if data['status'] == 'OK':  #nothing wrong with query
                    console.add('elevation', num, outof)
                    return data, True
                else:  #if an error, pass it on
                    console.add(
                        'elevation', num, outof, False,
                        'Problem with query or data: ' + data['status'])
                    print 'Elevation Error (if)'
                    return ElevationURL(url, num, outof, attempt + 1)
            except:  #if an error, pass it on
                console.add('elevation', num, outof, False,
                            'Problem with connection')
                print 'Elevation Error'
                return ElevationURL(url, num, outof, attempt + 1)

        api_url = 'https://maps.googleapis.com/maps/api/elevation/json?locations=enc:'
        requests = []
        chunks = list(util.chunkify(elevation_refs, 200))
        for chunk in chunks:
            '''polyline = Polyline.encode('|'.join(map(lambda a: pts[a[0]].string, chunk)))'''
            polyline = Polyline.encode('|'.join(
                [pts[a[0]].string for a in chunk]))
            requests.append((polyline, chunk, api_url + polyline +
                             '&key=AIzaSyCnHT7IpJu0O7n-apLNW8iKkW_rTIuANuE'))

        request_num = 1
        for poly, refs_list, url in requests:
            data, success = ElevationURL(url, request_num, len(requests), 0)
            if not success:
                return None, None, False
            request_num += 1
            if data:
                for res, refs in zip(data['results'], refs_list):
                    for ref in refs:
                        pts[ref].elevation = res['elevation']

        for to_int, ref1, ref2 in interpolation_refs:
            pt = pts[to_int[0]]
            pt1 = pts[ref1[0]]
            pt2 = pts[ref2[0]]
            dist1 = pt.pt.distanceTo(pt1.pt)
            dist2 = pt.pt.distanceTo(pt2.pt)
            total = dist1 + dist2
            slope = (pt2.elevation - pt1.elevation) / total
            elevation = pt1.elevation + dist1 * slope
            for pt in to_int:
                pts[pt].elevation = elevation

        return pts, requests, True
 def fit_epoch(self, T, batch_T, 
               learning_rate = None, 
               print_iter = False,
               iter_indices = None):
     if self.active_session == False:
         self.initialize_session()
     if iter_indices is None:
         obs = T
         iter_indices = np.arange(obs)
     else:
         obs = len(iter_indices)
     np.random.shuffle(iter_indices)
     iter_batches = util.chunkify(iter_indices, max(len(iter_indices) // batch_T, 1))
     itern = len(iter_batches)
     average_ll = 0
     tan_exrets = []
     average_mean = 0
     average_vari = 0
     average_exrets = 0
     sse = 0.
     ssr = 0.
     for step, indices in enumerate(iter_batches):
         
         Tval = len(indices)
         sample_ratio = Tval / obs
         feed_dict = {self.T: Tval,  
                      self.sample_ratio: sample_ratio,
                      self.use_indices: np.array(indices).astype(np.int32)}
         
         if learning_rate is not None:
             feed_dict.update({self.learning_rate: learning_rate})
         else:
             feed_dict.update({self.learning_rate: self.current_learning_rate})
         
         session_output = self.session.run([self.optimizer, self.loss, 
                                            self.tangency_exrets, 
                                            self.average_mean,
                                            self.average_exrets,
                                            self.sse, self.ssr,
                                            self.vari_mean], 
                                             feed_dict = feed_dict)
         _, nll, rr, amn, aex, ssep, ssrp, vm = session_output
         
         ### print the log-likelihood stuff
         #self.print_ll(feed_dict)
         
         average_ll -= nll / obs
         tan_exrets += list(rr)
         average_mean += amn / itern
         average_exrets += aex / itern
         average_vari += vm / itern
         ssr += ssrp
         sse += ssep
     
     ### save data to output
     mean_rr = np.mean(tan_exrets)
     var_rr = np.var(tan_exrets)
     average_rr = mean_rr / np.sqrt(var_rr)
     desc_str = 'Cross Validation'
     self.epoch += 1
     desc_str = 'Training'
     if print_iter:
         print('Epoch %d' % (self.epoch))
         print('\t%s LL at step %d: %f' % (desc_str, self.epoch, average_ll))
         print('\tR2: %f' % (1. - sse / ssr))
         print('\t%s Sharpe at step %d: %f' % (desc_str, self.epoch, average_rr))
         print('\t%s Average mu at step %d: %f' % (desc_str, self.epoch, average_mean))
         print('\t%s Average variance at step %d: %f' % (desc_str, self.epoch, average_vari))
         print('\t%s Average excess returns at step %d: %f' % (desc_str, self.epoch, average_exrets))
         print('\t\t\t%s Learning Rate at step %d: %f' % (desc_str, self.epoch, self.current_learning_rate))
     
     outdata = {'loglikelihood': average_ll, 
                'sharpe_ratio': average_rr,
                'learning_rate': self.current_learning_rate,
                'mean': mean_rr,
                'variance': var_rr}
     
     return outdata
Beispiel #7
0
    def elevation(self, pts, elevation_refs, interpolation_refs, console):
        '''Gets elevation from reference data.'''

        def ElevationURL(url, num, outof, attempt):
            '''Get Elevation data from a url.'''
            if attempt == self.max_failed_queries:
                console.add('elevation', num, outof, False, 'Quitting this query. Data will be inaccurate')
                return None, False
            
            current_time = time.time()
            if current_time < self.next_query_time: #before earliest possible query time
                time.sleep(self.next_query_time - current_time) #wait until next query time

            self.next_query_time = current_time + 0.2 #set the next time a query can be fired to 0.2 seconds from now
            
            try: #a ton of things can go wrong!  
                data = json.load(urlopen(url))
                if data['status'] == 'OK': #nothing wrong with query
                    console.add('elevation', num, outof)
                    return data, True
                else: #if an error, pass it on
                    console.add('elevation', num, outof, False, 'Problem with query or data: '+data['status'])
                    print 'Elevation Error (if)'
                    return ElevationURL(url, num, outof, attempt+1)
            except: #if an error, pass it on
                console.add('elevation', num, outof, False, 'Problem with connection')
                print 'Elevation Error'
                return ElevationURL(url, num, outof, attempt+1)

        
        api_url = 'https://maps.googleapis.com/maps/api/elevation/json?locations=enc:'
        requests = []
        chunks = list(util.chunkify(elevation_refs, 200))
        for chunk in chunks:
            '''polyline = Polyline.encode('|'.join(map(lambda a: pts[a[0]].string, chunk)))'''
            polyline = Polyline.encode('|'.join([pts[a[0]].string for a in chunk]))
            requests.append((polyline, chunk, api_url + polyline + '&key=AIzaSyCnHT7IpJu0O7n-apLNW8iKkW_rTIuANuE'))

        request_num = 1
        for poly, refs_list, url in requests:
            data, success = ElevationURL(url, request_num, len(requests), 0)
            if not success:
                return None, None, False
            request_num += 1
            if data:
                for res, refs in zip(data['results'], refs_list):
                    for ref in refs:
                        pts[ref].elevation = res['elevation']

        for to_int, ref1, ref2 in interpolation_refs:
            pt = pts[to_int[0]]
            pt1 = pts[ref1[0]]
            pt2 = pts[ref2[0]]
            dist1 = pt.pt.distanceTo(pt1.pt)
            dist2 = pt.pt.distanceTo(pt2.pt)
            total = dist1 + dist2
            slope = (pt2.elevation - pt1.elevation)/total
            elevation = pt1.elevation + dist1 * slope
            for pt in to_int:
                pts[pt].elevation = elevation
                
        return pts, requests, True