def testResizing(): testdata = config.data['datafile']['test'] dl = DataLoader() data = dl.read(testdata[0]) print len(data) newdata = BoxStretch(data, 600) ml = MagneticLine() ml.addLine(data, "origin") ml.addLine(newdata, "newline") ml.show()
def load(self, fileList): loader = DataLoader() self.standard = loader.read(fileList[0]) self.sampleSize = len(self.standard) data = [] # extend all data to same length for dataFile in fileList: line = loader.read(dataFile) dist, path = dtwDistance(self.standard, line) lineData = dtwExtend(line, path) data.append(lineData) self.data = data
def main(): loader = DataLoader() data = loader.read(config.data["datafile"]["test"][0]) context = zmq.Context() publisher = context.socket(zmq.REQ) publisher.connect("tcp://127.0.0.1:9988") index = 20 path_particles_count = len(data) while True: currPos = float(index) / path_particles_count * 18.18 publisher.send(json.dumps({"data": data[index - 5 : index], "id": 1, "currPos": currPos})) recv = publisher.recv() print json.loads(recv) time.sleep(0.5) index += 5
def testGraphing(): # import scipy.misc.imresize testdata = config.data['datafile']['test'] dl = DataLoader() l = dl.read(testdata[0]) l2 = dl.read(testdata[1]) l3 = dl.read(testdata[2]) ml = MagneticLine() # l = np.array(l) # l1 = np.kron(l, [0.5, 0.5]) # l2 = np.kron(l, [1, 1]) # l3 = np.kron(l, [1.5, 1.5]) # l2 = np.array(l2) # l2 = np.kron(l2, [1,2]) ml.addLine(l, "a0") # ml.addLine(l1, "a1") ml.addLine(l2, "a1") ml.addLine(l3, "b0") ml.show()
def main(): paths = "/home/nikoscf/PycharmProjects/BookRecommendation/configurations/paths.yml" load_begin = DataLoader() load_begin.read_paths(paths) # Uncomment to Execute this one time to get the zip if is .zip, unzip it in absolute dir you set in paths.yaml # Then it checks for .csv and remove the redundant zip folder # load_begin.check_zip_and_csv() books = load_begin.read_data("BX-Books.csv") users = load_begin.read_data("BX-Users.csv") ratings = load_begin.read_data("BX-Book-Ratings.csv") to_drop_columns = ['Image-URL-S', 'Image-URL-M', 'Image-URL-L'] numeric_col_to_nan = ["Year-Of-Publication"] data_books = DataClean(books) clean_books = data_books.execute_pipeline_cleaning(to_drop_columns, numeric_col_to_nan) to_drop_columns = [] numeric_col_to_nan = ["User-ID", "Age"] data_users = DataClean(users) clean_users = data_users.execute_pipeline_cleaning(to_drop_columns, numeric_col_to_nan) to_drop_columns = [] numeric_col_to_nan = ["User-ID", "ISBN", "Book-Rating"] data_ratings = DataClean(ratings) clean_ratings = data_ratings.execute_pipeline_cleaning( to_drop_columns, numeric_col_to_nan) data_analysis = DataAnalysis() ratings_pivoted = data_analysis.execute_pipeline_data_analysis( clean_ratings, clean_users, clean_books) return ratings_pivoted
def testGraphing(): testdata = config.data['datafile']['test'] dl = DataLoader() l = dl.read(testdata[0]) l2 = dl.read(testdata[1]) l3 = dl.read(testdata[2]) ml = MagneticLine() dist, path = dtwDistance(l, l2) lineData = dtwExtend(l2, path) print path # l = np.array(l) # l1 = np.kron(l, [0.5, 0.5]) # l2 = np.kron(l, [1, 1]) # l3 = np.kron(l, [1.5, 1.5]) # l2 = np.array(l2) # l2 = np.kron(l2, [1,2]) ml.addLine(l, "a0") # ml.addLine(l1, "a1") ml.addLine(l2, "a1") ml.addLine(lineData, "b0") ml.show()
def init_vectorizers(log=True): """ Initializes vectorizers. """ if log: print("Initializing vectorizers...", end="\r") # Create DataLoaders for train and full for the vectorizers trainD = DataLoader() trainD.loadData('../dataset/dataset-train.npy') fullD = DataLoader() fullD.loadData('../dataset/dataset.npy') # Create the vectorizers return [ TFIDFRequestTextVectorizer(trainD), HelperIDVectorizer(fullD), CourseIDVectorizer(), RequestTimeVectorizer(), StudentVectorizer(fullD), PastRequestsVectorizer(fullD), DueDateVectorizer() ]
def generate_inference_file(filename='test.tsv', cache=True): if cache: return pickle.load(open(f'{abspath}/ser/sents.ser', 'rb')) else: sents = [] tokenized_sents = [] pairs = DataLoader('srcdata2') pairs = pairs + get_augmented_data() + get_test_data() for pair in tqdm(pairs, desc='Tokenizing sentences'): sents.append(pair[0]) tokenized_sents.append(CustomTokenizer(pair[0])) sents.append(pair[1]) tokenized_sents.append(CustomTokenizer(pair[1])) pickle.dump(sents, open(f'{abspath}/ser/sents.ser', 'wb')) inf_file = open(filename, 'w') for sent in tqdm(tokenized_sents, desc='Writing tokenized sentences'): for token in sent: inf_file.write(f'{token.strip()}\tO\n') inf_file.write('\n') return sents
command_parser.add_argument('-m', '--model', type=str) command_parser.add_argument('-b', '--buckets', type=str, default="24w") command_parser.add_argument('-l', '--lr', type=float, default=0.01) command_parser.set_defaults(func=lambda: 'test') ARGS = parser.parse_args() if 'func' not in ARGS or ARGS.func is None: parser.print_help() elif ARGS.time not in ['w', 'h', 't']: print("ERROR: invalid time '%s'" % ARGS.time) else: with tf.Graph().as_default(): model = createModel(ARGS) loader = DataLoader() vectorizers = init_vectorizers() # Filter out bad requests if we are training on help time if ARGS.time == 'h': loader.loadData( ARGS.data.name, filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) else: loader.loadData(ARGS.data.name) # Training if ARGS.func() == 'train': model.run(loader, vectorizers, ARGS.time, run_type='train') train_loss = model.run(loader, vectorizers,
def __init__(self, method): print('USING METHOD: {}'.format(method)) # Read lyrics dataset and get train/test splits dl = DataLoader() self.train_x, self.train_y, self.test_x, self.test_y = dl.load(method)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Jun 16 17:33:23 2019 @author: ishanshrivastava """ from data.DataLoader import DataLoader #from features.CustomTokenizer import CustomTokenizer pairs = DataLoader("<srcdata>") #print(CustomTokenizer(pairs[3][0]))
def loadFileGraph(self, filename): filepath = './data/' + filename if os.path.isfile(filepath): loader = DataLoader() self.setLine(loader.read(filepath)) self.draw()
from config.Config import config from data.DataLoader import DataLoader from algorithm.Dtw import dtwCalculate testdatafilename = config.data['datafile']['test'] dl = DataLoader() line1 = dl.read(testdatafilename[0]) sub = line1[100:105] dist, position= dtwCalculate(sub, line1)
import tensorflow as tf from data.DataLoader import DataLoader from ClassificationNNModel import ClassificationNNModel from random import uniform from RunNN import Config if __name__ == "__main__": config = Config("h", 10, "classification", 5) results = [] for i in range(100): print("Iteration %i" % i) config.lr = 10**uniform(-2, -6) with tf.Graph().as_default(): model = ClassificationNNModel(config) loader = DataLoader() loader.loadData("../dataset/dataset-train.npy", filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0, log=False) model.run(loader, "h", train=True, log=False) loss = model.run(loader, "h", train=False, log=False) results.append((config.lr, loss)) print(sorted(results, key=lambda r: r[1]))
import keras from core.utils import AucHistory from core.VideoPixelNetwork import VideoPixelNetwork from data.DataLoader import DataLoader frames_count = 10 frames_step = 5 # >= 1 data_loader = DataLoader('data/ped1_train.txt', 'data/ped1_test.txt', frames_count, frames_step, validation_split=0.1) train_generator = data_loader.train_generator(batch_size=2) validation_generator = data_loader.validation_generator(batch_size=5) frame_shape = train_generator.X_shape filters = 16 k_encoder = 20 lstm_filters = 32 k_decoder = 32 dilation = True decoder_kernel_size = 5 vpn = VideoPixelNetwork(filters, frame_shape, frames_count, k_encoder=k_encoder, lstm_filters=lstm_filters, k_decoder=k_decoder,
from data.DataLoader import DataLoader import matplotlib.pyplot as plt from collections import Counter from util import make_buckets, make_bucket_mapper if __name__ == "__main__": d = DataLoader() d.loadData('../dataset/dataset.npy') help_vals = [r.getHelpTimeMinutes() for r in d.laIRRequests] wait_vals = [r.getWaitTimeMinutes() for r in d.laIRRequests] bucket_vals = [i for i in range(0, 120, 10)] + [float('inf')] plt.hist([help_vals, wait_vals], bucket_vals, label=["Help Time", "Wait Time"]) plt.title("CS106 LaIR Wait and Help Times") plt.xlabel("Time (minutes)") plt.ylabel("# Requests") plt.legend() plt.show()
from data.DataLoader import DataLoader import matplotlib.pyplot as plt import numpy as np import os if __name__ == '__main__': dl = DataLoader() train_x, train_y, test_x, test_y = dl.load('binary') genres = np.concatenate((train_y, test_y)) print(len(genres)) # read genrenames names = [] datadir = os.path.join(os.path.dirname(__file__), 'data/genresList.txt') with open(datadir) as file: for line in file: names.append(line.strip().lower()) genres = list(map(lambda x: names[x], genres)) counts = [] for i in range(len(names)): c = len(list(filter(lambda x: x == names[i], genres))) counts.append(c) plt.rc('axes', axisbelow=True) plt.grid(b=True, axis='x', color='#eeeeee', zorder=-1) plt.ylabel('Genre', labelpad=15, fontsize=18, color='#555555') plt.xlabel('Number of songs', labelpad=15, fontsize=18, color='#555555') plt.title('Genre Distribution', pad=15, fontsize=20, color='#555555')
from data.DataLoader import DataLoader loader = DataLoader() datapack = loader.datasource() while True: print datapack.capture()
def run(ModelType, args): print("\n********* %s %s Model *********" % (("Logistic" if ModelType == LogisticRegression else "Linear"), ("Wait" if args.time == 'w' else "Help"))) vectorizers = init_vectorizers() trainLoader = DataLoader() evaluateLoader = DataLoader() testLoader = DataLoader() # Filter out bad requests if we are running on help time if args.time == 'h': trainLoader.loadData('../dataset/dataset-train.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) evaluateLoader.loadData('../dataset/dataset-dev.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) testLoader.loadData('../dataset/dataset-test.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0) else: trainLoader.loadData('../dataset/dataset-train.npy') evaluateLoader.loadData('../dataset/dataset-dev.npy') testLoader.loadData('../dataset/dataset-test.npy') if ModelType == LogisticRegression: buckets = make_buckets(trainLoader, args.buckets, args.time) mapper = make_bucket_mapper(buckets) else: mapper = lambda x: x labelFn = lambda x: mapper(x.getWaitTimeMinutes() if args.time == 'w' else x.getHelpTimeMinutes()) trainLabels = trainLoader.getLabels(labelFn) trainInputs = trainLoader.applyVectorizers(vectorizers, "train", args.time) devLabels = evaluateLoader.getLabels(labelFn) devInputs = evaluateLoader.applyVectorizers(vectorizers, "dev", args.time) testLabels = testLoader.getLabels(labelFn) testInputs = evaluateLoader.applyVectorizers(vectorizers, "test", args.time) trainedModel = trainModel(ModelType, trainInputs, trainLabels) evaluateModel(trainedModel, devInputs, devLabels) evaluateModel(trainedModel, testInputs, testLabels)