def kfold_cross_validate(directory, k): print 'Beginning k-fold cross validation...' subset_list = breakup_training(directory, k) results = [[] for i in xrange(10)] # outer array = each model, inner array = results per iteration # loop through each subset list, run training + validation for i in xrange( len(subset_list) ): # split the training docs into training + validation validation_set = set( subset_list[i] ) remaining = subset_list[:i] + subset_list[i + 1:] train_set = set( [index for subset in remaining for index in subset] ) # no resampling hmm_model_0 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=False) # smooth both hmm_model_1 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=False) # smooth emission only hmm_model_2 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=False) # smooth transition only hmm_model_3 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=False) # no smoothing results[0].append( cross_validate_hmm(directory, hmm_model_0, validation_set) ) results[1].append( cross_validate_hmm(directory, hmm_model_1, validation_set) ) results[2].append( cross_validate_hmm(directory, hmm_model_2, validation_set) ) results[3].append( cross_validate_hmm(directory, hmm_model_3, validation_set) ) # with resampling hmm_model_4 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=True, resample=True) # smooth both hmm_model_5 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=True, resample=True) # smooth emission only hmm_model_6 = hmm.HMM(directory, train_set, smooth_trans=True, smooth_emiss=False, resample=True) # smooth transition only hmm_model_7 = hmm.HMM(directory, train_set, smooth_trans=False, smooth_emiss=False, resample=True) # no smoothing results[4].append( cross_validate_hmm(directory, hmm_model_4, validation_set) ) results[5].append( cross_validate_hmm(directory, hmm_model_5, validation_set) ) results[6].append( cross_validate_hmm(directory, hmm_model_6, validation_set) ) results[7].append( cross_validate_hmm(directory, hmm_model_7, validation_set) ) # baseline with and without resampling baseline_1 = baseline.Baseline(directory, train_set, resample=False) baseline_2 = baseline.Baseline(directory, train_set, resample=True) results[8].append( cross_validate_baseline(directory, baseline_1, validation_set) ) results[9].append( cross_validate_baseline(directory, baseline_2, validation_set) ) # status update print str((float(i + 1) / k) * 100) + '% complete' # return the avg results tuple for each model that we train/test across all k-fold cross-validation rounds return [get_avg_results(model_results, k) for model_results in results]
def __init__(self, model_type=None, model_params=""): if (model_type == None): self.model = None self.vectorizer = None return if (model_type == "baseline"): self.model = baseline.Baseline() elif (model_type == "svm"): self.model = eval("SVC(" + model_params + ")") #self.model = SVC(kernel="linear") elif (model_type == "knn"): self.model = eval("KNeighborsClassifier(" + model_params + ")") #self.model = KNeighborsClassifier(n_neighbors=3) elif (model_type == "naive_bayes"): self.model = MultinomialNB() elif (model_type == "decision_trees"): self.model = DecisionTreeClassifier(random_state=0) elif (model_type == "log_regression"): self.model = eval("LogisticRegression(" + model_params + ")") elif (model_type == "perceptron"): self.model = eval("Perceptron(" + model_params + ")") else: print >> sys.stderr, "Model of type " + model_type + " is not supported." self.vectorizer = DictVectorizer(sparse=True)
def baselineClassify(train, test): """ The function returns the tagging prediction by the baseline system as a dictionary. """ model = baseline.Baseline(train) lines = preprocess.readFile(test) prediction = {'PER': [], 'LOC': [], 'ORG': [], 'MISC': []} lineNum = 1 for line in lines: if (lineNum % 3) == 1: #Line with Tokens tokens = line.strip().split() tags = BLdebug(model.assignTags(tokens)) elif (lineNum % 3) == 0: #Line with indexes indexes = line.strip().split() preClass = None firstIdx = None lastIdx = None NEcontinues = False for i in range(len(tags)): bioTag = tags[i][:1] if bioTag == 'B': if NEcontinues: #Previous tag ends prediction[preClass].append(firstIdx + '-' + lastIdx) preClass = tags[i][2:] firstIdx = indexes[i] lastIdx = indexes[i] NEcontinues = True elif bioTag == 'I': curClass = tags[i][2:] assert NEcontinues and curClass == preClass lastIdx = indexes[i] else: # bioTag == 'O' if NEcontinues: prediction[preClass].append(firstIdx + '-' + lastIdx) preClass = None firstIdx = None lastIdx = None NEcontinues = False lineNum += 1 return prediction
def __init__(self, speclist, xtype='frequency', xarr=None, force=False, **kwargs): if xarr is None: self.xarr = speclist[0].xarr else: self.xarr = xarr self.units = speclist[0].units self.header = speclist[0].header self.parse_header(self.header) for spec in speclist: if not isinstance(spec, Spectrum): raise TypeError( "Must create an ObsBlock with a list of spectra.") if not np.array_equal(spec.xarr, self.xarr): if not force: raise ValueError("Mismatch between X axes in ObsBlock") if spec.units != self.units: raise ValueError("Mismatched units") if force: self.speclist = [ interpolation.interp(spec, self) for spec in speclist ] else: self.speclist = speclist self.nobs = len(self.speclist) # Create a 2-dimensional array of the data self.data = np.array([sp.data for sp in self.speclist]).swapaxes(0, 1).squeeze() self.error = np.array([sp.error for sp in self.speclist]).swapaxes(0, 1).squeeze() self.plotter = plotters.Plotter(self) self._register_fitters() self.specfit = fitters.Specfit(self, Registry=self.Registry) self.baseline = baseline.Baseline(self)
def __init__(self, speclist, xunits='GHz', **kwargs): print "Creating spectra" speclist = list(speclist) for ii, spec in enumerate(speclist): if type(spec) is str: spec = Spectrum(spec) speclist[ii] = spec self.speclist = speclist print "Concatenating data" self.xarr = units.SpectroscopicAxes( [sp.xarr.as_unit(xunits) for sp in speclist]) self.xarr.units = xunits self.xarr.xtype = units.unit_type_dict[xunits] self.data = np.ma.concatenate([sp.data for sp in speclist]) self.error = np.ma.concatenate([sp.error for sp in speclist]) self._sort() self.header = pyfits.Header() for spec in speclist: for key, value in spec.header.items(): try: self.header[key] = value except (ValueError, KeyError): warn("Could not update header KEY=%s to VALUE=%s" % (key, value)) self.plotter = plotters.Plotter(self) self._register_fitters() self.specfit = fitters.Specfit(self, Registry=self.Registry) self.baseline = baseline.Baseline(self) self.units = speclist[0].units for spec in speclist: if spec.units != self.units: raise ValueError("Mismatched units") # Special. This needs to be modified to be more flexible; for now I need it to work for nh3 self.plot_special = None self.plot_special_kwargs = {}
def __init__(self, filename=None, filetype=None, xarr=None, data=None, error=None, header=None, doplot=False, maskdata=True, plotkwargs={}, xarrkwargs={}, **kwargs): """ Create a Spectrum object. Must either pass in a filename or ALL of xarr, data, and header, plus optionally error. kwargs are passed to the file reader Parameters ---------- filename : string The file to read the spectrum from. If data, xarr, and error are specified, leave filename blank. filetype : string Specify the file type (only needed if it cannot be automatically determined from the filename) xarr : `units.SpectroscopicAxis` or `np.ndarray` The X-axis of the data. If it is an np.ndarray, you must pass `xarrkwargs` or a valid header if you want to use any of the unit functionality. data : `np.ndarray` The data array (must have same length as xarr) error : `np.ndarray` The error array (must have same length as the data and xarr arrays) header : `pyfits.Header` or dict The header from which to read unit information. Needs to be a `pyfits.Header` instance or another dictionary-like object with the appropriate information maskdata : boolean turn the array into a masked array with all nan and inf values masked doplot : boolean Plot the spectrum after loading it? plotkwargs : dict keyword arguments to pass to the plotter xarrkwargs : dict keyword arguments to pass to the SpectroscopicAxis initialization (can be used in place of a header) Examples -------- >>> sp = pyspeckit.Spectrum(data=np.random.randn(100), xarr=np.linspace(-50, 50, 100), error=np.ones(100)*0.1, xarrkwargs={'unit':'km/s', 'refX':4.829, 'refX_units':'GHz', 'xtype':'VLSR-RAD'}, header={}) >>> xarr = pyspeckit.units.SpectroscopicAxis(np.linspace(-50,50,100), units='km/s', refX=6562.83, refX_units='angstroms') >>> data = np.random.randn(100)*5 + np.random.rand(100)*100 >>> err = np.sqrt(data/5.)*5. # Poisson noise >>> sp = pyspeckit.Spectrum(data=data, error=err, xarr=xarr, header={}) >>> # if you already have a simple fits file >>> sp = pyspeckit.Spectrum('test.fits') """ if filename: if filetype is None: suffix = filename.rsplit('.', 1)[1] if suffix in readers.suffix_types: # use the default reader for that suffix filetype = readers.suffix_types[suffix][0] reader = readers.readers[filetype] else: raise TypeError("File with suffix %s is not recognized." % suffix) else: if filetype in readers.readers: reader = readers.readers[filetype] else: raise TypeError("Filetype %s not recognized" % filetype) self.data, self.error, self.xarr, self.header = reader( filename, **kwargs) # these should probably be replaced with registerable function s... if filetype in ('fits', 'tspec', 'pyfits', 'sdss'): self.parse_header(self.header) elif filetype is 'txt': self.parse_text_header(self.header) elif filetype in ('hdf5', 'h5'): self.parse_hdf5_header(self.header) if isinstance(filename, str): self.fileprefix = filename.rsplit( '.', 1)[0] # Everything prior to .fits or .txt elif xarr is not None and data is not None: # technically, this is unpythonic. But I don't want to search for all 10 attributes required. if issubclass(type(xarr), units.SpectroscopicAxis): self.xarr = xarr else: self.xarr = units.SpectroscopicAxis(xarr, **xarrkwargs) self.data = data if error is not None: self.error = error else: self.error = data * 0 if hasattr(header, 'get'): self.header = header else: # set as blank warn("WARNING: Blank header.") self.header = pyfits.Header() self.parse_header(self.header) if maskdata: if hasattr(self.data, 'mask'): self.data.mask += np.isnan(self.data) + np.isinf(self.data) self.error.mask += np.isnan(self.data) + np.isinf(self.data) else: self.data = np.ma.masked_where( np.isnan(self.data) + np.isinf(self.data), self.data) self.error = np.ma.masked_where( np.isnan(self.data) + np.isinf(self.data), self.error) self.plotter = plotters.Plotter(self) self._register_fitters() self.specfit = fitters.Specfit(self, Registry=self.Registry) self.baseline = baseline.Baseline(self) self.speclines = speclines self._sort() # Special. This needs to be modified to be more flexible; for now I need it to work for nh3 self.plot_special = None self.plot_special_kwargs = {} if doplot: self.plotter(**plotkwargs)
def slice(self, start=None, stop=None, units='pixel', copy=True, preserve_fits=False): """Slicing the spectrum .. WARNING:: this is the same as cropping right now, but it returns a copy instead of cropping inplace Parameters ---------- start : numpy.float or int start of slice stop : numpy.float or int stop of slice units : str allowed values are any supported physical unit, 'pixel' copy : bool Return a 'view' of the data or a copy? preserve_fits : bool Save the fitted parameters from self.fitter? """ if units in ('pixel', 'pixels'): start_ind = start stop_ind = stop else: x_in_units = self.xarr.as_unit(units) start_ind = x_in_units.x_to_pix(start) stop_ind = x_in_units.x_to_pix(stop) if start_ind > stop_ind: start_ind, stop_ind = stop_ind, start_ind spectrum_slice = slice(start_ind, stop_ind) if copy: sp = self.copy() else: sp = self sp.data = sp.data[spectrum_slice] if sp.error is not None: sp.error = sp.error[spectrum_slice] sp.xarr = sp.xarr[spectrum_slice] if copy: # create new specfit / baseline instances (otherwise they'll be the wrong length) sp._register_fitters() sp.baseline = baseline.Baseline(sp) sp.specfit = fitters.Specfit(sp, Registry=sp.Registry) else: # inplace modification sp.baseline.crop(start_ind, stop_ind) sp.specfit.crop(start_ind, stop_ind) if preserve_fits: sp.specfit.modelpars = self.specfit.modelpars sp.specfit.parinfo = self.specfit.parinfo sp.baseline.baselinepars = self.baseline.baselinepars sp.baseline.order = self.baseline.order return sp
the values on a 2.9in Waveshare e-Paper display. """ import machine import esp32 import utime import ccs811 import bme280 import screen import battery import baseline import config _i2c = machine.I2C(scl=config.scl, sda=config.sda, freq=100000) _rtc = machine.RTC() _bat = battery.Battery(config.battery) _baseline = baseline.Baseline() def run(): """Main entry point to execute this program.""" try: bme = bme280.BME280(i2c=_i2c, mode=bme280.BME280_OSAMPLE_4) scr = screen.Screen(config) if _delete_ccs811_baseline_requested(): _baseline.delete() if _is_first_run(): # 20 runs (minutes), p9 of datasheet _set_runs_to_condition(20) ccs = ccs811.CCS811(_i2c, mode=ccs811.CCS811.DRIVE_MODE_60SEC)
import baseline as b #import capture_data as d #import visualize as v import torch import numpy as np #Declare model model = b.Baseline('cuda:0').to('cuda:0') #Load in data data = torch.as_tensor( np.load('../data/batch_0.npz')).float().to('cuda').permute(0, 3, 1, 2) # checkpoint = torch.load('checkpoint.pth') #checkpoint = torch.load('checkpoint.pth') # model.do_train_on_vid('training_data/vid1', 100000, batch_size=4, checkpoint=checkpoint) #model.do_train_on_vid('training_data/vid1', 100000, batch_size=8, checkpoint=checkpoint) model.do_train(data, data, 1000, batch_size=8)
words = pickle.load(f) with open('all_PoS.pickle', 'rb') as f: pos = pickle.load(f) # Clean data. data = utils.process_data_set(data) words, data = utils.handle_rare_words(words, data, 2) words, pos = utils.process_words_pos(words, pos) # Initialize training and testing set. training_set_size = int(0.8 * len(data)) training_set = data[:training_set_size] test_set = data[training_set_size:] # Initialize and evaluate the baseline model. baseline = baseline.Baseline(pos, words, training_set) total_predictions, correct_predictions = evaluate(baseline.MAP, test_set) # Find the models accuracy. accuracy = (correct_predictions / total_predictions) * 100 print('Baseline model accuracy: {}'.format(accuracy)) # Initialize and evaluate the HMM model. hmm_model = hmm.HMM(pos, words, training_set) total_predictions, correct_predictions = evaluate(hmm_model.viterbi, test_set) # Find the models accuracy. accuracy = (correct_predictions / total_predictions) * 100 print('HMM model accuracy: {}'.format(accuracy))
sensor_funcs = { 'cur': get_cur, 'light': get_light, 'level': get_level, 'temp': get_temp, 'humid': get_humid, 'smoist': get_smoist } now = int(time.time()) # Convert to midnight now += clock_start - time_since_midnight(now) try: baseline_schedule = baseline.Baseline(args.baseline, now) now = baseline_schedule.time0 except: print('baseline file %s not found or parse error' % args.baseline) exit() rospy.init_node('Simulator', anonymous=True) generate_publishers() generate_subscribers() #print("Now: %f %s" %(now, clock_time(now))) clock_pub.publish(rospy.Time.from_sec(now)) last_update = now while not rospy.core.is_shutdown():
newAsset = oldAsset.copy() newAsset.date = newAsset.date + datetime.timedelta(days=1) newAsset.todayIn = 0 allAsset.append(newAsset) if __name__ == "__main__": # 读取数据 tradeList = pd.read_excel('/Users/test/OneDrive/Project/Stock/Datum/40047552对账单/All.xlsx') # 生成一个初始asset list allAsset = [] initDate = datetime.datetime(2014, 8, 6) #这个是起始日期,开户第一天,有转账记录,但是没有实际金额 asset = Asset(initDate) allAsset.append(asset) # 初始化基线统计参数,需要统计基线的净值、年度涨幅 baseCode = '399006' baseline = baseline.Baseline(baseCode, initDate.strftime("%Y%m%d")) # 初始化持仓统计参数,需要统计净值、仓位、年度涨幅 myStatistic = pd.DataFrame(data = np.array([1, 0, 0]).reshape(1, 3), columns = ['net', 'position', 'asset'], index = [initDate.strftime("%Y%m%d")]) # 行是日期 myYearRaise = pd.Series() # 行是年,每年最后第一个自然日的日期,列是年内涨幅 # 新的一天开始 print(tradeList.shape[0] - 1) for dayNum in range(tradeList.shape[0] - 1): newDay(allAsset) today = allAsset[-1].date todayDateNum = int(today.strftime("%Y%m%d")) # 这里从对账单里面读出来的是int类型的,不是字符串,所以要转换一下 if todayDateNum == 20151231: a = 1 if todayDateNum == 20191206: break todayTradeOperateRecord = tradeList[tradeList.iloc[:, 0] == todayDateNum] # 处理盘中交易