def testRegressionWithRorSelectedNonalignedMultidimData(self):
     """Run a single experiment with multidimension data"""
     data = self.createTimeSeriesData(10, 3, [np.random.randint(100, 1000) for unused in range(10)])
     custom_vars = dict({"output_fncs": [lambda ds: np.mean(ds[1])], "data_mapping": DataNormalizer(0, 2)})
     exp = RegressionExperiment()
     val = exp.run(data, graph=False, disp=False, static_variables=custom_vars)
     assert type(val) == np.float64, "Run did not return a number."
 def testRegressionWithRorSelectedMultidimData(self):
     """Run a single experiment with multidimension data"""
     data = self.createTimeSeriesData(150, 3, 50)
     custom_vars = dict({"output_fncs": [lambda ds: np.mean(ds[1])]})
     exp = RegressionExperiment()
     val = exp.run(data, graph=False, disp=False, static_variables=custom_vars)
     assert type(val) == np.float64, "Run did not return a number."
 def testOneDimExperiment(self, repeats=0):
     """Test a one-dimensional regression experiment"""
     exp = RegressionExperiment()
     data_set = self.createTimeSeriesData(100, 200)
     custom_vars = dict({"seg:validation_split": 0.3, "fg:generators": [gens.identity, gens.maximum, gens.minimum]})
     indep_vars = dict({"seg:allowable_overlap": range(7, 9)})
     val = exp.run(
         data_set,
         graph=False,
         disp=False,
         variables_under_test=indep_vars,
         static_variables=custom_vars,
         repeats=repeats,
     )
     assert type(val) == np.ndarray, "Run did not return a np.ndarray."
     assert len(val) == len(indep_vars.values()[0]), "Run did not return a correct length array."
     assert type(val[0]) == np.float64, "Run did not return a list of floats."
 def testTwoDimExperiment(self, repeats=0):
     """Test a two-dimensional regression experiment"""
     exp = RegressionExperiment()
     data_set = self.createTimeSeriesData(100, 200)
     custom_vars = dict({"seg:validation_split": 0.3, "fg:generators": [gens.identity, gens.maximum, gens.minimum]})
     indep_vars = dict({"seg:allowable_overlap": range(1, 3), "seg:predictor_length": [15, 20]})
     val = exp.run(
         data_set,
         graph=False,
         disp=False,
         variables_under_test=indep_vars,
         static_variables=custom_vars,
         repeats=repeats,
     )
     assert type(val) == np.ndarray, "Run did not return a np.ndarray."
     assert np.shape(val) == (len(indep_vars.values()[0]), len(indep_vars.values()[1])) or np.shape(
         np.transpose(val)
     ) == (len(indep_vars.values()[0]), len(indep_vars.values()[1])), "Run did not return a correct size array."
     assert type(val[0][0]) == np.float64, "Run did not return an array of floats."
 def testNoExperimentCustomVars(self, repeats=0):
     """Test a regression experiment with custom vars."""
     exp = RegressionExperiment()
     data_set = self.createTimeSeriesData(100, 200)
     custom_vars = dict(
         {
             "output_fncs": [np.min],
             "data_mapping": None,
             "seg:predictor_length": 10,
             "seg:predictee_length": 1,
             "seg:allowable_overlap": 1,
             "seg:validation_split": 0.3,
             "fg:generators": [gens.identity, gens.maximum, gens.minimum],
             "fg:threshold": 0.05,
             "reg:constructor": SupportVectorRegressor,
             "reg:training_params": dict({"kernel_type": 1, "degree": 2}),
         }
     )
     val = exp.run(data_set, graph=False, disp=False, static_variables=custom_vars, repeats=repeats)
     assert type(val) == np.float64, "Run did not return a number."
 def __init__(self, arguments=sys.argv):
     usage = 'usage: %prog [options] DATA_FILE'
     parser = OptionParser(usage=usage)
     
     # Input configuration
     parser.add_option('-i','--input-file',dest='input_files',action='append',
                       type='string',metavar='FILE',
                       help='Configuration input file for a single run')
     
     parser.add_option('-f','--format',dest='format',action='store',choices=self.readers.keys(),
                       metavar='FMT',help='Data File Format',default='csv')
     
     parser.add_option('-o','--output-file',dest='output_file',action='store',type='string',
                       default='regresion_experiment.csv',help='Output File',metavar='FILE')
     
     parser.add_option('-q','--quiet',dest='verbose',action='store_false',type='bool',
                       default=True,help='Quiet the run')
             
     (options, datafile) = parser.parse_args(arguments)
     
     data = self.readers[options['format']](datafile)
     
     exp = RegressionExperiment()
     
     if options['input_files'] == []:
         print 'Running regression experiment using no input file'
         print exp.run(data.quotes(), graph=options['verbose'], disp=options['verbose'])
     else:
         for ifn in options['input_files']:
             print 'Running regression experiment using input file:', ifn
             (static_config, variable_config) = deserializeConfiguration(ifn) 
             print exp.run(
                     data.quotes(), 
                     graph=options['verbose'],
                     disp=options['verbose'],
                     static_variables=static_config,
                     variables_under_test=variable_config)
from minerva.regression import SupportVectorRegressor

datadir = '/home/jon/Code/Minerva/test_data/'
datafile = 'test_data_nasdaq_a'
# datafile = 'nasdaq_full_1990_to_2010'
testvars = dict({'seg:predictor_length': range(10,50,2)})
nontestvars = dict({
                    'fg:generators': [[gens.identity],
                                      [gens.identity,gens.mean,gens.stdev],
                                      [gens.identity],
                                      [gens.identity],
                                      [gens.identity,gens.maximum,gens.minimum]],
                    
                    'data_mapping': DataNormalizer(ror_divisor_row=1, volume_row=4),
                    'output_fncs': [lambda v: np.mean(v[1]), lambda v: np.std(v[1])],
                    'reg:constructor': SupportVectorRegressor,
                    'reg:training_params': dict({'use_shrinking': '0'})
                   })
repeats = 10

if __name__ == '__main__':
    data_file = QuoteReaderCsv(datadir + datafile)
    exp = RegressionExperiment()
    outdata = exp.run(data_file.quotes(), 
                      graph=True, 
                      disp=True, 
                      variables_under_test=testvars,
                      static_variables=nontestvars, 
                      repeats=repeats)
    
    
 def testNoExperimentDefaultVars(self, repeats=0):
     """Test a regression experiment with default vars."""
     exp = RegressionExperiment()
     data_set = self.createTimeSeriesData(100, 200)
     val = exp.run(data_set, graph=False, disp=False, repeats=repeats)
     assert val * 0 == 0, "Run did not return a number."
 def testRegressionWithNonAlignedData(self):
     """Run a single experiment with nonaligned data"""
     data = self.createTimeSeriesData(10, [np.random.randint(100, 1000) for unused in range(10)])
     exp = RegressionExperiment()
     val = exp.run(data, graph=False, disp=False)
     assert type(val) == np.float64, "Run did not return a number."