def read_data(): ## logger = logging.getLogger(__name__) logger.info('Reading data for from [{}]'.format(dt.data_dir(), 'task2')) X_test = pandas.read_csv(os.path.join(dt.data_dir(), 'task2', 'X_test.csv'), header=0, index_col=0) X_train = pandas.read_csv(os.path.join(dt.data_dir(), 'task2', 'X_train.csv'), header=0, index_col=0) y_train = pandas.read_csv(os.path.join(dt.data_dir(), 'task2', 'y_train.csv'), header=0, index_col=0) ## logger.info('Modify y_train to be of type <<int.>>') y_train.y = y_train.y.astype(int) return \ X_test,\ X_train,\ y_train,\
def read_data(): test = pandas.read_csv(os.path.join(dt.data_dir(), 'task0', 'test.csv'), header=0, index_col=0) train = pandas.read_csv(os.path.join(dt.data_dir(), 'task0', 'train.csv'), header=0, index_col=0) return test, train
def main(): ## Read Data trainingData = pandas.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX1.value, 'train.csv'), header=0, index_col=0) #rs = numpy.random.RandomState(99) #newIndex = rs.choice(trainingData.index,trainingData.__len__()) #trainingData = trainingData.loc[newIndex, :] yCols = ['y'] xCols = trainingData.drop(columns=yCols).columns ## Set-up cross validation lambdaParam = [.1, 1, 10, 100, 1000] rmseVec = pandas.Series(index=[str(i) for i in lambdaParam], name='RMSE') #rmseVec = pandas.Series({key: None for key in lambdaParam}, name='RMSE) N = 50 for l in lambdaParam: measuredRMSE = [] for k in range(10): fold = numpy.arange(k * (N), ((k + 1) * 50)) mask = trainingData.index.isin(fold) X_t = trainingData.loc[~mask, xCols] y_t = trainingData.loc[~mask, yCols] X = trainingData.loc[mask, xCols] y = trainingData.loc[mask, yCols] #reg = linear_model.Ridge(alpha = l) #reg.fit(X_t,y_t) B = cr.ridge_regression(X=X_t, y=y_t, lambdaParam=l) betas = pandas.Series(data=B.flatten(), index=xCols) #betas = reg.coef_ yFit = pandas.Series(X.dot(betas).values.flatten(), index=y.index, name='yFit') measuredRMSE.append( numpy.sqrt(numpy.mean((y.iloc[:, 0] - yFit)**2))) rmseVec[str(l)] = numpy.mean(measuredRMSE) print(rmseVec) rmseVec.to_csv(os.path.join(cd.data_dir(), cd.DataSets.EX1.value, '__sample.csv'), index=False)
def __init__(self, x, y): GameRect.__init__(self, x, y, 63, 74) #self.color = pygame.Color('orange') self.walk_force = 0 self.image = [image.load(os.path.join(data_dir(), 'art', 'orange', 'owange.png')).convert_alpha()] self.animation = image.load(os.path.join(data_dir(), 'art', 'orange', 'orange_splat_small.png')).convert_alpha() #spritesheet.load_strip('orange_splat.png', 1362, colorkey = None)[0] self.in_air = True self.allowed_glide = 2 self.role = "Owange" self.status = None self.deadtime = 0 # owange collides with ground and boughs self.layers = LayerType.PLAYER | LayerType.ITEMS self.layers = 1
def read_data(): X_test = pandas.read_csv(os.path.join(dt.data_dir(), 'task1', 'X_test.csv'), header=0, index_col=0) X_train = pandas.read_csv(os.path.join(dt.data_dir(), 'task1', 'X_train.csv'), header=0, index_col=0) y_train = pandas.read_csv(os.path.join(dt.data_dir(), 'task1', 'y_train.csv'), header=0, index_col=0) return X_test, X_train, y_train
def main(window, handle_events): ## window = Window() ## window.init() world = World() world.stage = 1 p1 = Platform(600, 300, 400, 50) world.add_item(p1) p2 = Platform(500, 600, 800, 100) world.add_item(p2) """ vert order: 0 3 1 2 """ rest_length, stiffness, damping = 200, 10, 1 spring = Spring(p1, p2, lambda vs: vs[1], lambda vs: vs[0], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs: vs[2], lambda vs: vs[3], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring( p1, p2, lambda vs: (vs[1] + vs[3]) / 2, lambda vs: (vs[1] + vs[3]) / 2, rest_length, 10 * stiffness, damping ) world.add_spring(spring) font_path = os.path.join(data_dir(), "fonts", "vinque", "vinque.ttf") fnt = font.Font(font_path, 36) surface = fnt.render("The adventures of...", True, (255, 255, 255)) word = Word(p2, surface, (200, 50)) world.add_word(word) fnt = font.Font(font_path, 48) text = "Woger the wibbly wobbly wombat" words = [fnt.render(word, True, (255, 255, 255)) for word in text.split()] word_positions = ((200, 75), (500, 75), (175, 250), (350, 250), (550, 250)) for surface, position in zip(words, word_positions): word = Word(p1, surface, position) world.add_word(word) fnt = font.Font(font_path, 24) surface = fnt.render("space to continue, use awwow keys in game to weach owanges", True, (0, 0, 0)) word = Word(p2, surface, (40, 550)) world.add_word(word) render = Render(window, world) while True: quit = handle_events(window, world) if quit: break world.update() render.draw_world() display.flip()
def __init__(self, window, world): self.window = window self.world = world self.camera = Camera(window) self.facing_right = 0 #self.font = font.SysFont(None, 48) font_path = os.path.join(data_dir(),"fonts", "vinque", "vinque.ttf") self.font = font.Font(font_path,48)
def __init__(self, window, world): self.window = window self.world = world self.camera = Camera(window) self.facing_right = 0 #self.font = font.SysFont(None, 48) font_path = os.path.join(data_dir(), "fonts", "vinque", "vinque.ttf") self.font = font.Font(font_path, 48)
def read(file): logger.info('reading [{file}]'.format(file=file)) full_file_path = os.path.join(dt.data_dir(), 'task5', '{}'.format(file[0])) df = pandas.read_csv(full_file_path, index_col=0, header=0) logger.info('finished reading [{file}] of shape [{shape}]'.format( file=file, shape=df.shape)) return df.values
def destroy(self): self.status = "Collided" #self.image = [image.load("data/art/orange/orange_splat_small.png").convert_alpha()] #spritesheet.load_strip('orange_splat.png', 1362, colorkey = None)[0] self.image = [ image.load( os.path.join(data_dir(), 'art', 'orange', 'orange_splat_small.png')).convert_alpha() ] self.body.reset_forces()
def __init__(self, **kwargs): clause_segmenter = kwargs.get('clause_segmenter') if clause_segmenter is None: self._clause_segmenter = ClauseSegmenter() else: self._clause_segmenter = clause_segmenter self._clause_discarder = StartOfClauseMatcher.from_file( data_dir('skip_rules.txt')) self._sent = None self._tokens = []
def __init__(self, x, y): GameRect.__init__(self, x, y, 63, 74) #self.color = pygame.Color('orange') self.walk_force = 0 self.image = [ image.load(os.path.join(data_dir(), 'art', 'orange', 'owange.png')).convert_alpha() ] self.animation = image.load( os.path.join(data_dir(), 'art', 'orange', 'orange_splat_small.png') ).convert_alpha( ) #spritesheet.load_strip('orange_splat.png', 1362, colorkey = None)[0] self.in_air = True self.allowed_glide = 2 self.role = "Owange" self.status = None self.deadtime = 0 # owange collides with ground and boughs self.layers = LayerType.PLAYER | LayerType.ITEMS self.layers = 1
def __init__(self, x=63, y=74): #x = random.randint(0,550) #y = random.randint(0,550) GameRect.__init__(self, x, y, 31, 75) self.image = [image.load(os.path.join(data_dir(), 'art', 'cherry', 'cherry_small.png')).convert_alpha()] #self.animation = image.load("data/art/orange/orange_splat_small.png").convert_alpha() #spritesheet.load_strip('orange_splat.png', 1362, colorkey = None)[0] self.in_air = True self.role = "Cherry" self.status = None self.deadtime = 0 # cherry collides with ground and boughs self.layers = 1
def dump_to_binary(X, name): ## logger = logging.getLogger(__name__) full_file_path = os.path.join(dt.data_dir(), 'task5', '{name}.p'.format(name=name)) with open(full_file_path, 'w+b') as f: logger.info( 'writing X to binary file [{file}]'.format(file=full_file_path)) pickle.dump(X, f) return full_file_path
def label_data(): logger = logging.getLogger(__name__) ## logger.info('import datasets') data_folder = os.path.join(dt.data_dir(), dt.DataSets.EX4.value) train_labeled = pd.read_hdf(os.path.join(data_folder, "train_labeled.h5"), "train") train_unlabeled = pd.read_hdf( os.path.join(data_folder, "train_unlabeled.h5"), "train") ## logger.info('import p(y|x) model predictions') files = os.listdir(data_folder) wanted = [ True if file.startswith('prediction_vector_') else False for file in files ] fetch = list(compress(files, wanted)) container = [] result = [ container.append( pd.read_csv(os.path.join(data_folder, file), header=None, index_col=0).values) for file in fetch ] predicted_labels = np.hstack(container) predicted_mode, count = stats.mode(predicted_labels, axis=1) decision_vector = pd.DataFrame(np.hstack((predicted_mode, count))) decision_vector.columns = ['class', 'count'] mask = decision_vector['count'] < 10 idx = mask[~mask].index ## logger.info('label remaining data') train_unlabeled_labeled = train_unlabeled.loc[idx + 9000, :] train_unlabeled_labeled.loc[:, 'y'] = decision_vector.loc[idx, 'class'].values # === shuffle data prior to fitting RM = np.random.RandomState(12357) train_labeled.index = RM.permutation(train_labeled.index) full_train_data = pd.concat([train_labeled, train_unlabeled_labeled], axis=0) return full_train_data
def __init__(self, x=63, y=74): #x = random.randint(0,550) #y = random.randint(0,550) GameRect.__init__(self, x, y, 31, 75) self.image = [ image.load( os.path.join(data_dir(), 'art', 'cherry', 'cherry_small.png')).convert_alpha() ] #self.animation = image.load("data/art/orange/orange_splat_small.png").convert_alpha() #spritesheet.load_strip('orange_splat.png', 1362, colorkey = None)[0] self.in_air = True self.role = "Cherry" self.status = None self.deadtime = 0 # cherry collides with ground and boughs self.layers = 1
def load_mini_h5(): h5py = pytest.importorskip("h5py") with h5py.File(data_dir('test', 'tree.mini.h5'), 'r') as io: y = io['y'][:] parent = io['parent'][:] xt = io['xt'][:] lam = io['lam'][()] root = int(io.attrs['root']) y = y.reshape(-1) n = len(y) parent = parent.astype(np.int32) assert len(parent) == n if parent.min() > 0: parent -= parent.min() root -= 1 assert parent[root] == root assert sys.getrefcount(parent) <= 2 assert sys.getrefcount(y) <= 2 return y, parent, xt, lam, root
def play_music(self, musicname, loop=-1): """ plays a music track. Only one can be played at a time. So if there is one playing, it will be stopped and the new one started. """ music = mixer.music if not music: return if music.get_busy(): #we really should fade out nicely and #wait for the end music event, for now, CUT music.stop() fullname = os.path.join(data_dir(), 'music', musicname) fullname_ogg = fullname + ".ogg" if os.path.exists(fullname_ogg): fullname = fullname_ogg music.load(fullname) music.play(loop) music.set_volume(1.0)
def load_image(filename, colorkey=None): """ colorkey - if -1, then use the top left pixel as the color. colorkey - if None, then use per pixel alpha images. """ if filename not in IMAGE_CACHE: if os.path.exists(filename): fname = filename else: fname = os.path.join(data_dir(), "art", filename) img = pygame.image.load(fname) if colorkey is not None: # color key images if colorkey is -1: colorkey = img.get_at((0,0)) img = img.convert() img.set_colorkey(colorkey, RLEACCEL) else: # per pixel alpha images. img = img.convert_alpha() IMAGE_CACHE[filename] = img return IMAGE_CACHE[filename]
def handle_events(window, world): quit = False for e in event.get(): if e.type == QUIT: quit = True break elif e.type == KEYDOWN: if 1 and e.key == K_s and e.mod & KMOD_SHIFT: pygame.image.save(pygame.display.get_surface(), "screeny.png") if e.key == K_ESCAPE: quit = True break elif e.key == K_RETURN and e.mod & KMOD_ALT: window.toggle_fullscreen() if world.stage == 1: #any key quits the intro quit = True return quit elif world.stage == 3: # anykey replays game, esc quits if quit: return quit else: # non-esc keypressed return 2 # Woger ## elif woger.allowed_glide or not woger.in_air: woger = world.player_character if e.key == K_LEFT: woger.do_walk(-1) elif e.key == K_RIGHT: woger.do_walk(1) elif e.key == K_SPACE or e.key == K_UP: woger.jump() elif e.key == K_DOWN: woger.dive() elif world.stage in (1, 3): # no key down, but stop here, we're in intro/outro return quit ## elif woger.allowed_glide or not woger.in_air: elif e.type == KEYUP: woger = world.player_character if e.key == K_LEFT: woger.end_walk() elif e.key == K_RIGHT: woger.end_walk() elif e.type == CLEANUP: world.remove_collided() elif e.type == TICK_TOCK: world.tick() elif e.type == ADDCHERRY: bounds = window.width world.add_cherry( random.randint(-bounds / 2, bounds / 2), random.randint(window.height - 300, window.height)) elif e.type == ADDOWANGE: bounds = window.width world.add_owange( random.randint(-bounds / 2, bounds / 2), random.randint(window.height - 300, window.height)) elif e.type == BIRDY: bird_files = glob.glob( os.path.join(data_dir(), 'sounds', 'birds*.ogg')) bsounds = [os.path.basename(b[:-4]) for b in bird_files] the_sound = random.choice(bsounds) Sounds.sounds.play(the_sound) return quit
""" import os import matplotlib.pyplot as plt import numpy as np import pandas as pd import data as cd from sklearn import datasets, metrics from sklearn.metrics import mean_squared_error, r2_score from sklearn.linear_model import LinearRegression from sklearn.linear_model import Perceptron from sklearn.metrics import classification_report, confusion_matrix from sklearn.model_selection import GridSearchCV # Load the datasets trainSet = pd.read_hdf(os.path.join(cd.data_dir(), cd.DataSets.EX3.value, 'train.h5'), key='train') testSet = pd.read_hdf(os.path.join(cd.data_dir(), cd.DataSets.EX3.value, 'test.h5'), key='test') sampleSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX3.value, 'sample.csv'), header=0, index_col=0, float_precision='round_trip') # Now apply the transformations to the data: from sklearn.preprocessing import StandardScaler scaler = StandardScaler() # Fit only to the training data
def main(): # === Read DELAYS and WEATHER data delays = dt.get_lineie_69_data() weather = dt.get_weather_data() # === Focus on BUS 69 mask = delays.linie == 69 delays = delays[mask] delays.reset_index(drop=True, inplace=True) # === Extract exact time delays delays.loc[:, 'diff'] = delays.ist_an_von - delays.soll_an_von delays.loc[:, 'time'] = pandas.to_datetime(delays.soll_an_von.copy().astype(float), errors='coerce', unit='s') delays.time = delays.time.dt.strftime('%H:%M') delays.loc[:, 'datetime'] = pandas.to_datetime(delays.datum_von.astype(str) + ' ' + delays.time) delays.datetime = delays.datetime.dt.round('60min') # === Show delay pattern as a function of time of day temp = delays.copy() temp.loc[:, 'hour_of_day'] = pandas.to_datetime(temp.time).dt.hour temp = temp.groupby('hour_of_day').mean() fig, ax = plt.subplots(1) ax.plot(temp.index, temp['diff']) ax.set_ylabel('Average delay [s]') ax.set_xlabel('Time of Day [HH:MM]') for tick in ax.get_xticklabels(): tick.set_rotation(90) plt.savefig('delay_vs_time-of-day.png') # === Merge with WEATHER data weatherDelays = weather.merge(delays, right_on='datetime', left_index=True, how='inner') weatherDelays.to_csv(os.path.join(dt.data_dir(), 'weather_delays_merged.csv')) # ==== Remove NaN where there is no public transport data mask = weatherDelays.datetime > datetime.datetime(2018,2,4) weatherDelays = weatherDelays[mask] del mask cumulativeWeatherDelays = weatherDelays.groupby('datetime').mean() averageWeatherDelays = weatherDelays.groupby('datetime').mean() # === Estimate DAILY SEASONALITY using Fourier transform ''' Description: Fourier transform of time-series data in time domain (yt, xt) to frequency domain (yf, xf): Arguments: :param n: (float) number of data points / observations :param T: (float) maximum frequency of data i.e. 1H, 1m, 1s ''' n, m = cumulativeWeatherDelays.shape T = 1/n yf = scipy.fftpack.fft(weatherDelays['diff'].values) xf = numpy.linspace(0, 1/2.0 * T, n/2) fig, ax = plt.subplots() ax.plot(xf, 2.0/n * numpy.abs(yf[:n//2])) # TODO Buiild function that 1. Picks data window, 2. FFT, 3. Removes frequency in xf domain, 4. IFFT, 5. Corr # === Try to remove DAILY SEASONALITY by subtracting previous day's value timeDelta = datetime.timedelta(days=1) temp = cumulativeWeatherDelays.copy() - cumulativeWeatherDelays.shift(freq=timeDelta) dailySeasoned = temp.dropna(how='all', axis=0) dailySeasoned = dailySeasoned.interpolate() del timeDelta, temp plt.figure() dailySeasoned['diff'].plot() # === Try to remove DAILY SEASONALITY by subtracting previous weeks's value timeDelta = datetime.timedelta(days=7) temp = cumulativeWeatherDelays.copy() - cumulativeWeatherDelays.shift(freq=timeDelta) weeklySeasoned = temp.dropna(how='all', axis=0) weeklySeasoned = weeklySeasoned.interpolate() del timeDelta, temp plt.figure() weeklySeasoned['diff'].plot() # === Plot data with and without seasoning treatment fig, axes = plt.subplots(2, sharex=True, figsize=(15, 10)) matplotlib.rc('xtick', labelsize=24) matplotlib.rc('ytick', labelsize=24) axis=0 axes[axis].plot(weeklySeasoned.index, cumulativeWeatherDelays.reindex(weeklySeasoned.index)['diff']) axes[axis].set_xlabel('Without de-seasoning') axes[axis].set_ylabel('Delay [s]') axis+=1 axes[axis].plot(weeklySeasoned.index, weeklySeasoned['diff']) axes[axis].set_xlabel('With de-seasoning') axes[axis].set_ylabel('Delay [s]') fig.savefig('seasoned_vs_de-seasoned_delay_data.png') # === Plot data without de-seasoning and rainfall data fig, axes = plt.subplots(2, sharex=True, figsize=(15, 10)) axis=0 axes[axis].plot(cumulativeWeatherDelays.index, cumulativeWeatherDelays['diff']) axes[axis].set_xlabel('Without de-seasoning') axes[axis].set_ylabel('Delay [s]') axis+=1 axes[axis].plot(weeklySeasoned.index, weeklySeasoned['niederschlag_mm']) axes[axis].set_xlabel('Rain data') axes[axis].set_ylabel('Rainfall [mm]') fig.savefig('seasoned_vs_rainfall_data.png') # === Plot delay-vs-weather graphs for de-seasoned data ''' Description: Scatter plot between CUMULATIVE MM RAIN and DELAYS ''' mask = cumulativeWeatherDelays.reindex(index=weeklySeasoned.index)['niederschlag_mm'] > 0 xData = cumulativeWeatherDelays.reindex(index=weeklySeasoned.index)['niederschlag_mm'][mask] yData = weeklySeasoned['diff'].loc[xData.index] corrMat = numpy.corrcoef(xData, yData) corrCoefPatch = mpatches.Patch(color='blue', label='Correlation coefficient := %.2f' %corrMat[0][1]) plt.figure() plt.scatter(x=xData, y=yData, marker='x') plt.xlabel('Precipitation (mm)') plt.ylabel('De-seasoned delay (s)') plt.legend(handles=[corrCoefPatch]) plt.tight_layout() plt.savefig('corr_rain_vs_delay_-_with_de-seasoning.png') del xData, yData mask = cumulativeWeatherDelays['niederschlag_mm'] > 0 xData = cumulativeWeatherDelays['niederschlag_mm'][mask] yData = cumulativeWeatherDelays['diff'].loc[xData.index] corrMat = numpy.corrcoef(xData, yData) corrCoefPatch = mpatches.Patch(color='blue', label='Correlation coefficient := %.2f' %corrMat[0][1]) plt.figure() plt.scatter(x=xData, y=yData, marker='x') plt.xlabel('Precipitation [mm]') plt.ylabel('Delay [s]') plt.legend(handles=[corrCoefPatch]) plt.tight_layout() plt.savefig('corr_rain_vs_delay_-_no_de-seasoning.png') del xData, yData ''' Description: Time-series plot between CUMULATIVE RAIN and DE-SEASONED DELAY ''' xData = cumulativeWeatherDelays['niederschlag_mm'] print(xData) yData = cumulativeWeatherDelays['diff'] fig, ax = plt.subplots(2, sharex=True, figsize=(15, 10)) axis=0 ax[axis].plot(yData.index, yData) ax[axis].set_ylabel('Delay [s]') axis+=1 ax[axis].bar(xData.index, height=xData, width=0.05, color='green') ax[axis].set_xlabel('YYYY-MM-DD:HH')
def prepare_data(small_sample=None): logger = logging.getLogger(__name__) if bool(small_sample): ## logger.debug('Read small sample data') data_dir = os.path.join(dt.data_dir(), dt.Tasks.TASK3.value) x_train = pandas.read_csv(os.path.join(data_dir, 'X_train.csv'), header=0, index_col=0, nrows=small_sample) x_test = pandas.read_csv(os.path.join(data_dir, 'X_test.csv'), header=0, index_col=0, nrows=small_sample) y_train = pandas.read_csv(os.path.join(data_dir, 'y_train.csv'), header=0, index_col=0, nrows=small_sample) else: ## logger.debug('Read data') data_dir = os.path.join(dt.data_dir(), dt.Tasks.TASK3.value) x_train = pandas.read_csv(os.path.join(data_dir, 'X_train.csv'), header=0, index_col=0) x_test = pandas.read_csv(os.path.join(data_dir, 'X_test.csv'), header=0, index_col=0) y_train = pandas.read_csv(os.path.join(data_dir, 'y_train.csv'), header=0, index_col=0) ## logger.debug('Fourier transform data') freq_x_train_matrix, pow_x_train = fourier.prepare_frequency_data_set( X=x_train, sample_frequency=Frequency.HERTZ.value, low_cut=0.67, high_cut=25, normalize=True, scale=True) freq_x_test_matrix, pow_x_test = fourier.prepare_frequency_data_set( X=x_test, sample_frequency=Frequency.HERTZ.value, low_cut=0.67, high_cut=25, normalize=True, scale=True) assert pow_x_test == pow_x_train ## freq_x_train = pandas.DataFrame(freq_x_train_matrix) freq_x_test = pandas.DataFrame(freq_x_test_matrix) return x_train, x_test, y_train, freq_x_test, freq_x_train
def main(): # Load the datasets trainSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'train.csv'), header=0, index_col=0, float_precision='round_trip') testSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'test.csv'), header=0, index_col=0, float_precision='round_trip') sampleSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'sample.csv'), header=0, index_col=0, float_precision='round_trip') xColumns = ['x' + str(i + 1) for i in range(16)] yColumns = ['y'] # Now apply the transformations to the data: from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(trainSet.loc[:, xColumns]) scaledTrainSet = scaler.transform(trainSet.loc[:, xColumns]) scaledTestSet = scaler.transform(testSet.loc[:, xColumns]) # set up SVM classifier svm = SVC() paramters = {'C': np.arange(1.85, 1.89, 0.001), 'kernel': ['rbf', 'poly']} optSVM = GridSearchCV(svm, paramters) optSVM.fit(scaledTrainSet, trainSet.loc[:, yColumns].as_matrix().flatten()) # MLP TRAINING optSVM.fit(scaledTrainSet, trainSet.loc[:, yColumns].as_matrix().flatten()) # prediction in-sample yOpt_in_sample = optSVM.predict(scaledTrainSet) # classification statistics dim = len(yOpt_in_sample) inSampleConfusionMatrix = confusion_matrix(trainSet.loc[:, yColumns], yOpt_in_sample) accuracy = np.sum(np.diag(inSampleConfusionMatrix)) / dim print("Using optimal parameter alpha, model accuracy %.4f " % accuracy) # prediction y_pred = optSVM.predict(scaledTestSet) # write to pandas Series object yPred = pd.DataFrame(y_pred, index=testSet.index, columns=['y']) yPred.to_csv( os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'svm_classifier_python.csv')) # print out results of GridSearchCV print(pd.DataFrame.from_dict(optSVM.cv_results_))
import os import matplotlib.pyplot as plt import numpy as np import pandas as pd import data as cd from sklearn import datasets, metrics from sklearn.metrics import mean_squared_error, r2_score from sklearn.linear_model import LinearRegression from sklearn.linear_model import Perceptron from sklearn.metrics import classification_report,confusion_matrix from sklearn.model_selection import GridSearchCV # Load the datasets trainSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'train.csv'), header=0, index_col=0, float_precision='round_trip') testSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'test.csv'), header=0, index_col=0, float_precision='round_trip') sampleSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX2.value, 'sample.csv'), header=0, index_col=0, float_precision='round_trip') # Now apply the transformations to the data: from sklearn.preprocessing import StandardScaler scaler = StandardScaler() # Fit only to the training data prcptrn = Perceptron(fit_intercept=False) xColumns = ['x' + str(i+1) for i in range(16)] yColumns = ['y'] scaler.fit(trainSet.loc[:, xColumns]) scaledTrainSet = scaler.transform(trainSet.loc[:, xColumns])
ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) import fourier import arima import bios #nrows = 10 nrows = 3030 + 443 + 1474 + 170 regenerate = False y = pd.read_csv(os.path.join(dt.data_dir(), 'task3', 'y_train.csv'), header=0, index_col=0, nrows=nrows) _X = pd.read_csv(os.path.join(dt.data_dir(), 'task3', 'X_train.csv'), header=0, index_col=0, nrows=nrows) _X_test = pd.read_csv(os.path.join(dt.data_dir(), 'task3', 'X_test.csv'), header=0, index_col=0, nrows=nrows) if regenerate: X_templates = bios.templates(_X) X_test_templates = bios.templates(_X_test)
def main(): # === Data train = pandas.read_hdf(os.path.join(cd.data_dir(), cd.DataSets.EX3.value, 'train.h5'), key='train') test = pandas.read_hdf(os.path.join(cd.data_dir(), cd.DataSets.EX3.value, 'test.h5'), key='test') train_tensor = tf.convert_to_tensor(train.as_matrix()) batches = tf.split(train, num_or_size_splits=36, axis=0) print(batches) features = ["x%i" % i for i in numpy.arange(1, 101, 1)] target = ['y'] n, F = train[features].shape C = train[target].unique # ==== Parameters learning_rate = 0.1 num_steps = 500 batch_size = 128 display_step = 100 # === Network Parameters n_hidden_1 = 256 # 1st layer number of neurons n_hidden_2 = 256 # 2nd layer number of neurons num_input = F num_classes = len(C) # MNIST total classes (0-9 digits) # === tf Graph input - reserve space / decalre X = tf.placeholder("float", [None, num_input]) Y = tf.placeholder("float", [None, num_classes]) # === Store layers weight & bias weights = { 'h1': tf.Variable(tf.random_normal([num_input, n_hidden_1])), 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), 'out': tf.Variable(tf.random_normal([n_hidden_2, num_classes])) } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 'out': tf.Variable(tf.random_normal([num_classes])) } # === Construct model logits = neural_net(X) prediction = tf.nn.softmax(logits) # Define loss and optimizer loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) train_op = optimizer.minimize(loss_op) # Evaluate model correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() # Start training with tf.Session() as sess: # Run the initializer sess.run(init) for step in range(1, num_steps + 1): for batch in batches: batch_x, batch_y = mnist.train.next_batch(batch_size) # Run optimization op (backprop) sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) if step % display_step == 0 or step == 1: # Calculate batch loss and accuracy loss, acc = sess.run([loss_op, accuracy], feed_dict={ X: batch_x, Y: batch_y }) print("Step " + str(step) + ", Minibatch Loss= " + \ "{:.4f}".format(loss) + ", Training Accuracy= " + \ "{:.3f}".format(acc)) print("Optimization Finished!") # Calculate accuracy for MNIST test images print("Testing Accuracy:", \ sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels})) return {}
import sys import logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) logger.addHandler(ch) #nrows = 10 nrows = 3030 + 443 + 1474 + 170 y = pd.read_csv(os.path.join(dt.data_dir(), 'task3', 'y_train.csv'), header=0, index_col=0, nrows=nrows) _X = pd.read_csv(os.path.join(dt.data_dir(), 'task3', 'X_train.csv'), header=0, index_col=0, nrows=nrows) _X_test = pd.read_csv(os.path.join(dt.data_dir(), 'task3', 'X_test.csv'), header=0, index_col=0, nrows=nrows) if True: X_templates = templates(_X) X_test_templates = templates(_X_test)
## print ("total_width is:%s:" % total_width) big_surf = pygame.Surface((total_width, surfs[0].get_height()), SRCALPHA, 32) x = 0 for s in surfs: big_surf.blit(s, (x, 0)) x += surfs[0].get_width() pygame.image.save(big_surf, out_fname) if __name__ == "__main__": if 1: combine_images_into_sprite_sheet(os.path.join(data_dir(), 'art', 'leaves'), os.path.join(data_dir(), 'art', 'leaves-rotating-88.png')) else: pygame.init() screen = pygame.display.set_mode((640,480)) # here we just load the strip into a whole bunch of sub surfaces. # sub surfaces reference the big image, but act just like normal surfaces. sub_surfaces = load_strip('leaf-movement-88.png', 88, colorkey = None) ## print len(sub_surfaces) assert(len(sub_surfaces) == 2)
@author: Anastasiya """ import os import matplotlib.pyplot as plt import numpy as np import pandas as pd import data as cd from sklearn import datasets, metrics from sklearn.metrics import mean_squared_error, r2_score from sklearn.linear_model import LinearRegression # Load the datasets trainSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX0.value, 'train.csv'), header=0, index_col=0, float_precision='round_trip') testSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX0.value, 'test.csv'), header=0, index_col=0, float_precision='round_trip') sampleSet = pd.read_csv(os.path.join(cd.data_dir(), cd.DataSets.EX0.value, 'sample.csv'), header=0, index_col=0, float_precision='round_trip')
def main(window, handle_events): ## window = Window() ## window.init() world = World() world.stage = 1 p1 = Platform(600, 300, 400, 50) world.add_item(p1) p2 = Platform(500, 600, 800, 100) world.add_item(p2) """ vert order: 0 3 1 2 """ rest_length, stiffness, damping = 200, 10, 1 spring = Spring(p1, p2, lambda vs: vs[1], lambda vs: vs[0], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs: vs[2], lambda vs: vs[3], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs: (vs[1] + vs[3]) / 2, lambda vs: (vs[1] + vs[3]) / 2, rest_length, 10 * stiffness, damping) world.add_spring(spring) font_path = os.path.join(data_dir(), "fonts", "vinque", "vinque.ttf") fnt = font.Font(font_path, 36) surface = fnt.render('The adventures of...', True, (255, 255, 255)) word = Word(p2, surface, (200, 50)) world.add_word(word) fnt = font.Font(font_path, 48) text = 'Woger the wibbly wobbly wombat' words = [fnt.render(word, True, (255, 255, 255)) for word in text.split()] word_positions = ( (200, 75), (500, 75), (175, 250), (350, 250), (550, 250), ) for surface, position in zip(words, word_positions): word = Word(p1, surface, position) world.add_word(word) fnt = font.Font(font_path, 24) surface = fnt.render( 'space to continue, use awwow keys in game to weach owanges', True, (0, 0, 0)) word = Word(p2, surface, (40, 550)) world.add_word(word) render = Render(window, world) while True: quit = handle_events(window, world) if quit: break world.update() render.draw_world() display.flip()
def handle_events(window, world): quit = False for e in event.get(): if e.type == QUIT: quit = True break elif e.type == KEYDOWN: if 1 and e.key == K_s and e.mod & KMOD_SHIFT: pygame.image.save( pygame.display.get_surface() , "screeny.png") if e.key == K_ESCAPE: quit = True break elif e.key == K_RETURN and e.mod & KMOD_ALT: window.toggle_fullscreen() if world.stage == 1: #any key quits the intro quit = True return quit elif world.stage == 3: # anykey replays game, esc quits if quit: return quit else: # non-esc keypressed return 2 # Woger ## elif woger.allowed_glide or not woger.in_air: woger = world.player_character if e.key == K_LEFT: woger.do_walk(-1) elif e.key == K_RIGHT: woger.do_walk(1) elif e.key == K_SPACE or e.key == K_UP: woger.jump() elif e.key == K_DOWN: woger.dive() elif world.stage in (1, 3): # no key down, but stop here, we're in intro/outro return quit ## elif woger.allowed_glide or not woger.in_air: elif e.type == KEYUP: woger = world.player_character if e.key == K_LEFT: woger.end_walk() elif e.key == K_RIGHT: woger.end_walk() elif e.type == CLEANUP: world.remove_collided() elif e.type == TICK_TOCK: world.tick() elif e.type == ADDCHERRY: bounds = window.width world.add_cherry(random.randint(-bounds/2, bounds/2), random.randint(window.height-300,window.height )) elif e.type == ADDOWANGE: bounds = window.width world.add_owange(random.randint(-bounds/2, bounds/2), random.randint(window.height-300,window.height )) elif e.type == BIRDY: bird_files = glob.glob(os.path.join(data_dir(),'sounds','birds*.ogg')) bsounds = [os.path.basename(b[:-4]) for b in bird_files] the_sound = random.choice(bsounds) Sounds.sounds.play(the_sound) return quit
def main(): N = 21600 validate_size = 2000 epochs = 50 type = ModelType.CNN_LSTM ################################### ### Read train data and fit models ################################### eeg1 = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'train_eeg1.csv'), header=0, index_col=0) eeg2 = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'train_eeg2.csv'), header=0, index_col=0) emg = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'train_emg.csv'), header=0, index_col=0) labels = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'train_labels.csv'), header=0, index_col=0) ########################## ### subject one model start = 0 end = N label = 'subject1_%s_%s_epochs' % (type, epochs) subject1_model = train(eeg1=eeg1.iloc[start:end, :], eeg2=eeg2.iloc[start:end, :], emg=emg.iloc[start:end, :], labels=labels.iloc[start:end, :], type=type, validate_size=validate_size, epochs=epochs, label=label) ########################## ### subject two model start = N end = N * 2 label = 'subject2_%s_%s_epochs' % (type, epochs) subject2_model = train(eeg1=eeg1.iloc[start:end, :], eeg2=eeg2.iloc[start:end, :], emg=emg.iloc[start:end, :], labels=labels.iloc[start:end, :], type=type, validate_size=validate_size, epochs=epochs, label=label) ########################## ### subject three model start = N * 2 end = N * 3 - 500 label = 'subject3_%s_%s_epochs' % (type, epochs) subject3_model = train(eeg1=eeg1.iloc[start:end, :], eeg2=eeg2.iloc[start:end, :], emg=emg.iloc[start:end, :], labels=labels.iloc[start:end, :], type=type, validate_size=validate_size, epochs=epochs, label=label) ############################################## ### Models fitted, read test data and predict ############################################## eeg1_test = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'test_eeg1.csv'), header=0, index_col=0) eeg2_test = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'test_eeg2.csv'), header=0, index_col=0) emg_test = pd.read_csv(os.path.join(dt.data_dir(), 'task5', 'test_emg.csv'), header=0, index_col=0) eeg1_test_A = df_row_norm(eeg1_test.iloc[:N, :].fillna(0)).values eeg2_test_A = df_row_norm(eeg2_test.iloc[:N, :].fillna(0)).values emg_test_A = df_row_norm(emg_test.iloc[:N, :].fillna(0)).values eeg1_test_B = df_row_norm(eeg1_test.iloc[N:, :].fillna(0)).values eeg2_test_B = df_row_norm(eeg2_test.iloc[N:, :].fillna(0)).values emg_test_B = df_row_norm(emg_test.iloc[N:, :].fillna(0)).values X_test_A = np.dstack((eeg1_test_A, eeg2_test_A, emg_test_A)) X_test_B = np.dstack((eeg1_test_B, eeg2_test_B, emg_test_B)) ################################# ### subject one model prediction label = 'subject_1_%s_weighted_%s_epochs' % (type, epochs) y_subject1_score = predict(X_test_A, X_test_B, model=subject1_model, type=type, weights=[1, 0.5, 2.5], label=label) ################################# ### subject two model prediction label = 'subject_2_%s_weighted_%s_epochs' % (type, epochs) y_subject2_score = predict(X_test_A, X_test_B, model=subject2_model, type=type, weights=[1, 0.5, 2.0], label=label) ################################### ### subject three model prediction label = 'subject_3_%s_weighted_%s_epochs' % (type, epochs) y_subject3_score = predict(X_test_A, X_test_B, model=subject3_model, type=type, weights=[1, 0.5, 4.5], label=label) ################################## ### all subjects model prediction label = 'all_subjects_%s_%s_epochs' % (type, epochs) y_score = y_subject1_score * 0.33 + y_subject2_score * 0.33 + y_subject3_score * 0.33 y_test = np.argmax(y_score, axis=1) result = pd.Series(y_test) expected = [0.526, 0.418, 0.0548] for i in range(3): print("class expected/realized class ratio [%s]: [%s/%s]" % (i, expected[i], sum(result == i) / len(result))) print("") result += 1 result.index.name = 'Id' result.name = 'y' pd.DataFrame(result).to_csv(os.path.join(dt.output_dir(), "%s.csv" % label)) ################################## ### all subjects model prediction label = 'all_subjects_%s_weighted_%s_epochs' % (type, epochs) y_score = (y_subject1_score * 0.5 + y_subject2_score * 0 + y_subject3_score * 0.5) * [1.5, 0.8, 1.6] y_test = np.argmax(y_score, axis=1) result = pd.Series(y_test) expected = [0.526, 0.418, 0.0548] for i in range(3): print("class expected/realized class ratio [%s]: [%s/%s]" % (i, expected[i], sum(result == i) / len(result))) print("") result += 1 result.index.name = 'Id' result.name = 'y' pd.DataFrame(result).to_csv(os.path.join(dt.output_dir(), "%s.csv" % label)) print("DONE")
def destroy(self): self.status = "Collided" #self.image = [image.load("data/art/orange/orange_splat_small.png").convert_alpha()] #spritesheet.load_strip('orange_splat.png', 1362, colorkey = None)[0] self.image = [image.load(os.path.join(data_dir(), 'art', 'orange', 'orange_splat_small.png')).convert_alpha()] self.body.reset_forces()
def main(window, handle_events, score): ## window = Window() ## window.init() world = World() world.stage = 3 p1 = Platform(600, 300, 400, 50) world.add_item(p1) p2 = Platform(500, 600, 800, 100) world.add_item(p2) """ vert order: 0 3 1 2 """ rest_length, stiffness, damping = 200, 10, 1 spring = Spring(p1, p2, lambda vs: vs[1], lambda vs: vs[0], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs: vs[2], lambda vs: vs[3], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs: (vs[1] + vs[3]) / 2, lambda vs: (vs[1] + vs[3]) / 2, rest_length, 10 * stiffness, damping) world.add_spring(spring) font_path = os.path.join(data_dir(), "fonts", "vinque", "vinque.ttf") ## fnt = font.Font(font_path, 36) ## surface = fnt.render('The adventures of...', True, (255,255,255)) ## word = Word(p2, surface, (200, 50)) ## world.add_word(word) fnt = font.Font(font_path, 48) text = 'You scored' words = [fnt.render(word, True, (0, 0, 0)) for word in text.split()] word_positions = ( (150, 60), (550, 60), ) for surface, position in zip(words, word_positions): word = Word(p1, surface, position) world.add_word(word) fnt = font.Font(font_path, 96) surface = fnt.render('% 2d' % score, True, (255, 255, 255)) word = Word(p2, surface, (230, 350)) world.add_word(word) fnt = font.Font(font_path, 24) surface = fnt.render('space to play again, escape to quit', True, (0, 0, 0)) word = Word(p2, surface, (200, 550)) world.add_word(word) render = Render(window, world) while True: quit = handle_events(window, world) if quit == 2: # non-esc keypressed return False if quit: return quit world.update() render.draw_world() display.flip()
def main(window, handle_events, score): ## window = Window() ## window.init() world = World() world.stage = 3 p1 = Platform(600, 300, 400, 50) world.add_item(p1) p2 = Platform(500, 600, 800, 100) world.add_item(p2) """ vert order: 0 3 1 2 """ rest_length, stiffness, damping = 200, 10, 1 spring = Spring(p1, p2, lambda vs:vs[1], lambda vs:vs[0], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs:vs[2], lambda vs:vs[3], rest_length, stiffness, damping) world.add_spring(spring) spring = Spring(p1, p2, lambda vs: (vs[1] + vs[3])/2, lambda vs: (vs[1] + vs[3])/2, rest_length, 10*stiffness, damping) world.add_spring(spring) font_path = os.path.join(data_dir(), "fonts", "vinque", "vinque.ttf") ## fnt = font.Font(font_path, 36) ## surface = fnt.render('The adventures of...', True, (255,255,255)) ## word = Word(p2, surface, (200, 50)) ## world.add_word(word) fnt = font.Font(font_path, 48) text = 'You scored' words = [fnt.render(word, True, (0,0,0)) for word in text.split()] word_positions = ( (150, 60), (550, 60), ) for surface, position in zip(words, word_positions): word = Word(p1, surface, position) world.add_word(word) fnt = font.Font(font_path, 96) surface = fnt.render('% 2d' %score, True, (255,255,255)) word = Word(p2, surface, (230, 350)) world.add_word(word) fnt = font.Font(font_path, 24) surface = fnt.render('space to play again, escape to quit', True, (0,0,0)) word = Word(p2, surface, (200, 550)) world.add_word(word) render = Render(window, world) while True: quit = handle_events(window, world) if quit == 2: # non-esc keypressed return False if quit: return quit world.update() render.draw_world() display.flip()
from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB import logging import tensorflow as tf # hello = tf.constant('Hello tf!') # sess = tf.Session() # print(sess.run(hello)) log = logging.getLogger(__name__) #data_folder = '/Users/dmitrykazakov/Desktop/Studium/MSc/2. Semester/ML/projects/task4_s8n2k3nd/data/ex4/' data_folder = os.path.join(dt.data_dir(), dt.DataSets.EX4.value) def input_eval_set(feature_names, feature_values, class_labels=None): features = dict( zip(feature_names, np.matrix(feature_values).transpose().tolist())) if class_labels is None: return features else: labels = np.int32(np.array(class_labels)) return features, labels def train_input_fn(features, labels, batch_size):
NOTE: not using pygames channel queueing as it only allows one sound to be queued. Also the sound can only be queued on a certain channel. """ from pygame import mixer import os import glob #the as alias allows re-imports from cyclic_list import cyclic_list as cyclic_list_func from data import data_dir SOUND_PATH = os.path.join(data_dir(), "sounds") def get_sound_list(path = SOUND_PATH): """ gets a list of sound names without thier path, or extension. """ # load a list of sounds without path at the beginning and .ogg at the end. sound_list = [] for ext in ['.wav', '.ogg']: sound_list += map(lambda x:x[len(path)+1:-4], #glob.glob(os.path.join(path,"*.ogg")) glob.glob(os.path.join(path,"*" + ext)) ) return sound_list