Ejemplo n.º 1
0
def testResizing():
    testdata = config.data['datafile']['test']
    dl = DataLoader()
    data = dl.read(testdata[0])
    print len(data)
    newdata = BoxStretch(data, 600)
    ml = MagneticLine()
    ml.addLine(data, "origin")
    ml.addLine(newdata, "newline")
    ml.show()
Ejemplo n.º 2
0
    def load(self, fileList):
        loader          = DataLoader()
        self.standard   = loader.read(fileList[0])
        self.sampleSize = len(self.standard)
        data            = []
        # extend all data to same length
        for dataFile in fileList:
            line       = loader.read(dataFile)
            dist, path = dtwDistance(self.standard, line)
            lineData   = dtwExtend(line, path)
            data.append(lineData)

        self.data = data
Ejemplo n.º 3
0
def main():
    loader = DataLoader()
    data = loader.read(config.data["datafile"]["test"][0])
    context = zmq.Context()
    publisher = context.socket(zmq.REQ)
    publisher.connect("tcp://127.0.0.1:9988")

    index = 20
    path_particles_count = len(data)

    while True:
        currPos = float(index) / path_particles_count * 18.18
        publisher.send(json.dumps({"data": data[index - 5 : index], "id": 1, "currPos": currPos}))
        recv = publisher.recv()
        print json.loads(recv)
        time.sleep(0.5)
        index += 5
Ejemplo n.º 4
0
def testGraphing():
    # import scipy.misc.imresize
    testdata = config.data['datafile']['test']
    dl = DataLoader()
    l = dl.read(testdata[0])
    l2 = dl.read(testdata[1])
    l3 = dl.read(testdata[2])
    ml = MagneticLine()
    # l = np.array(l)
    # l1 = np.kron(l, [0.5, 0.5])
    # l2 = np.kron(l, [1, 1])
    # l3 = np.kron(l, [1.5, 1.5])

    # l2 = np.array(l2)
    # l2 = np.kron(l2, [1,2])
    ml.addLine(l, "a0")
    # ml.addLine(l1, "a1")
    ml.addLine(l2, "a1")
    ml.addLine(l3, "b0")
    ml.show()
def main():
    paths = "/home/nikoscf/PycharmProjects/BookRecommendation/configurations/paths.yml"
    load_begin = DataLoader()
    load_begin.read_paths(paths)

    # Uncomment to Execute this one time to get the zip if is .zip, unzip it in absolute dir you set in paths.yaml
    # Then it checks for .csv and remove the redundant zip folder
    # load_begin.check_zip_and_csv()

    books = load_begin.read_data("BX-Books.csv")
    users = load_begin.read_data("BX-Users.csv")
    ratings = load_begin.read_data("BX-Book-Ratings.csv")

    to_drop_columns = ['Image-URL-S', 'Image-URL-M', 'Image-URL-L']
    numeric_col_to_nan = ["Year-Of-Publication"]
    data_books = DataClean(books)
    clean_books = data_books.execute_pipeline_cleaning(to_drop_columns,
                                                       numeric_col_to_nan)

    to_drop_columns = []
    numeric_col_to_nan = ["User-ID", "Age"]
    data_users = DataClean(users)
    clean_users = data_users.execute_pipeline_cleaning(to_drop_columns,
                                                       numeric_col_to_nan)

    to_drop_columns = []
    numeric_col_to_nan = ["User-ID", "ISBN", "Book-Rating"]
    data_ratings = DataClean(ratings)
    clean_ratings = data_ratings.execute_pipeline_cleaning(
        to_drop_columns, numeric_col_to_nan)

    data_analysis = DataAnalysis()
    ratings_pivoted = data_analysis.execute_pipeline_data_analysis(
        clean_ratings, clean_users, clean_books)
    return ratings_pivoted
Ejemplo n.º 6
0
def testGraphing():
    testdata = config.data['datafile']['test']
    dl = DataLoader()
    l = dl.read(testdata[0])
    l2 = dl.read(testdata[1])
    l3 = dl.read(testdata[2])
    ml = MagneticLine()
    dist, path = dtwDistance(l, l2)
    lineData = dtwExtend(l2, path)

    print path
   # l = np.array(l)
    # l1 = np.kron(l, [0.5, 0.5])
    # l2 = np.kron(l, [1, 1])
    # l3 = np.kron(l, [1.5, 1.5])

    # l2 = np.array(l2)
    # l2 = np.kron(l2, [1,2])
    ml.addLine(l, "a0")
    # ml.addLine(l1, "a1")
    ml.addLine(l2, "a1")
    ml.addLine(lineData, "b0")
    ml.show()
Ejemplo n.º 7
0
def init_vectorizers(log=True):
	"""
	Initializes vectorizers.
	"""
	if log: print("Initializing vectorizers...", end="\r")

	# Create DataLoaders for train and full for the vectorizers
	trainD = DataLoader()
	trainD.loadData('../dataset/dataset-train.npy')
	fullD = DataLoader()
	fullD.loadData('../dataset/dataset.npy')

	# Create the vectorizers
	return [
		TFIDFRequestTextVectorizer(trainD),
		HelperIDVectorizer(fullD),
		CourseIDVectorizer(),
		RequestTimeVectorizer(),
		StudentVectorizer(fullD),
		PastRequestsVectorizer(fullD),
		DueDateVectorizer()
	]
def generate_inference_file(filename='test.tsv', cache=True):
    if cache:
        return pickle.load(open(f'{abspath}/ser/sents.ser', 'rb'))
    else:
        sents = []
        tokenized_sents = []
        pairs = DataLoader('srcdata2')
        pairs = pairs + get_augmented_data() + get_test_data()
        for pair in tqdm(pairs, desc='Tokenizing sentences'):
            sents.append(pair[0])
            tokenized_sents.append(CustomTokenizer(pair[0]))
            sents.append(pair[1])
            tokenized_sents.append(CustomTokenizer(pair[1]))
            pickle.dump(sents, open(f'{abspath}/ser/sents.ser', 'wb'))
        inf_file = open(filename, 'w')
        for sent in tqdm(tokenized_sents, desc='Writing tokenized sentences'):
            for token in sent:
                inf_file.write(f'{token.strip()}\tO\n')
            inf_file.write('\n')
        return sents
Ejemplo n.º 9
0
    command_parser.add_argument('-m', '--model', type=str)
    command_parser.add_argument('-b', '--buckets', type=str, default="24w")
    command_parser.add_argument('-l', '--lr', type=float, default=0.01)
    command_parser.set_defaults(func=lambda: 'test')

    ARGS = parser.parse_args()

    if 'func' not in ARGS or ARGS.func is None:
        parser.print_help()
    elif ARGS.time not in ['w', 'h', 't']:
        print("ERROR: invalid time '%s'" % ARGS.time)
    else:
        with tf.Graph().as_default():

            model = createModel(ARGS)
            loader = DataLoader()
            vectorizers = init_vectorizers()

            # Filter out bad requests if we are training on help time
            if ARGS.time == 'h':
                loader.loadData(
                    ARGS.data.name,
                    filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
            else:
                loader.loadData(ARGS.data.name)

            # Training
            if ARGS.func() == 'train':
                model.run(loader, vectorizers, ARGS.time, run_type='train')
                train_loss = model.run(loader,
                                       vectorizers,
Ejemplo n.º 10
0
 def __init__(self, method):
     print('USING METHOD: {}'.format(method))
     # Read lyrics dataset and get train/test splits
     dl = DataLoader()
     self.train_x, self.train_y, self.test_x, self.test_y = dl.load(method)
Ejemplo n.º 11
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 16 17:33:23 2019

@author: ishanshrivastava
"""

from data.DataLoader import DataLoader
#from features.CustomTokenizer import CustomTokenizer

pairs = DataLoader("<srcdata>")
#print(CustomTokenizer(pairs[3][0]))
Ejemplo n.º 12
0
 def loadFileGraph(self, filename):
     filepath = './data/' + filename
     if os.path.isfile(filepath):
         loader = DataLoader()
         self.setLine(loader.read(filepath))
         self.draw()
Ejemplo n.º 13
0
from config.Config import config
from data.DataLoader import DataLoader
from algorithm.Dtw import dtwCalculate

testdatafilename = config.data['datafile']['test']
dl = DataLoader()
line1 = dl.read(testdatafilename[0])

sub = line1[100:105]

dist, position= dtwCalculate(sub, line1)
Ejemplo n.º 14
0
import tensorflow as tf
from data.DataLoader import DataLoader
from ClassificationNNModel import ClassificationNNModel
from random import uniform
from RunNN import Config

if __name__ == "__main__":
    config = Config("h", 10, "classification", 5)

    results = []
    for i in range(100):
        print("Iteration %i" % i)
        config.lr = 10**uniform(-2, -6)

        with tf.Graph().as_default():
            model = ClassificationNNModel(config)
            loader = DataLoader()
            loader.loadData("../dataset/dataset-train.npy",
                            filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0,
                            log=False)
            model.run(loader, "h", train=True, log=False)
            loss = model.run(loader, "h", train=False, log=False)
            results.append((config.lr, loss))

    print(sorted(results, key=lambda r: r[1]))
Ejemplo n.º 15
0
import keras

from core.utils import AucHistory
from core.VideoPixelNetwork import VideoPixelNetwork
from data.DataLoader import DataLoader

frames_count = 10
frames_step = 5  # >= 1
data_loader = DataLoader('data/ped1_train.txt',
                         'data/ped1_test.txt',
                         frames_count,
                         frames_step,
                         validation_split=0.1)

train_generator = data_loader.train_generator(batch_size=2)
validation_generator = data_loader.validation_generator(batch_size=5)
frame_shape = train_generator.X_shape

filters = 16
k_encoder = 20
lstm_filters = 32
k_decoder = 32
dilation = True
decoder_kernel_size = 5

vpn = VideoPixelNetwork(filters,
                        frame_shape,
                        frames_count,
                        k_encoder=k_encoder,
                        lstm_filters=lstm_filters,
                        k_decoder=k_decoder,
Ejemplo n.º 16
0
from data.DataLoader import DataLoader
import matplotlib.pyplot as plt
from collections import Counter
from util import make_buckets, make_bucket_mapper

if __name__ == "__main__":
    d = DataLoader()
    d.loadData('../dataset/dataset.npy')

    help_vals = [r.getHelpTimeMinutes() for r in d.laIRRequests]
    wait_vals = [r.getWaitTimeMinutes() for r in d.laIRRequests]

    bucket_vals = [i for i in range(0, 120, 10)] + [float('inf')]

    plt.hist([help_vals, wait_vals],
             bucket_vals,
             label=["Help Time", "Wait Time"])
    plt.title("CS106 LaIR Wait and Help Times")
    plt.xlabel("Time (minutes)")
    plt.ylabel("# Requests")
    plt.legend()
    plt.show()
Ejemplo n.º 17
0
from data.DataLoader import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import os

if __name__ == '__main__':
    dl = DataLoader()
    train_x, train_y, test_x, test_y = dl.load('binary')

    genres = np.concatenate((train_y, test_y))
    print(len(genres))

    # read genrenames
    names = []
    datadir = os.path.join(os.path.dirname(__file__), 'data/genresList.txt')
    with open(datadir) as file:
        for line in file:
            names.append(line.strip().lower())

    genres = list(map(lambda x: names[x], genres))

    counts = []
    for i in range(len(names)):
        c = len(list(filter(lambda x: x == names[i], genres)))
        counts.append(c)

    plt.rc('axes', axisbelow=True)
    plt.grid(b=True, axis='x', color='#eeeeee', zorder=-1)
    plt.ylabel('Genre', labelpad=15, fontsize=18, color='#555555')
    plt.xlabel('Number of songs', labelpad=15, fontsize=18, color='#555555')
    plt.title('Genre Distribution', pad=15, fontsize=20, color='#555555')
Ejemplo n.º 18
0
from data.DataLoader import DataLoader

loader = DataLoader()
datapack = loader.datasource()

while True:
	print datapack.capture()
Ejemplo n.º 19
0
def run(ModelType, args):
	print("\n********* %s %s Model *********" % (("Logistic" if ModelType == LogisticRegression else "Linear"), ("Wait" if args.time == 'w' else "Help")))
	vectorizers = init_vectorizers()
	trainLoader = DataLoader()
	evaluateLoader = DataLoader()
	testLoader = DataLoader()

	# Filter out bad requests if we are running on help time
	if args.time == 'h':
		trainLoader.loadData('../dataset/dataset-train.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
		evaluateLoader.loadData('../dataset/dataset-dev.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
		testLoader.loadData('../dataset/dataset-test.npy', filterFn=lambda x: x.getHelpTimeMinutes() >= 2.0)
	else:
		trainLoader.loadData('../dataset/dataset-train.npy')
		evaluateLoader.loadData('../dataset/dataset-dev.npy')
		testLoader.loadData('../dataset/dataset-test.npy')

	if ModelType == LogisticRegression:
		buckets = make_buckets(trainLoader, args.buckets, args.time)
		mapper = make_bucket_mapper(buckets)
	else:
		mapper = lambda x: x

	labelFn = lambda x: mapper(x.getWaitTimeMinutes() if args.time == 'w' else x.getHelpTimeMinutes())
	trainLabels = trainLoader.getLabels(labelFn)
	trainInputs = trainLoader.applyVectorizers(vectorizers, "train", args.time)
	devLabels = evaluateLoader.getLabels(labelFn)
	devInputs = evaluateLoader.applyVectorizers(vectorizers, "dev", args.time)
	testLabels = testLoader.getLabels(labelFn)
	testInputs = evaluateLoader.applyVectorizers(vectorizers, "test", args.time)

	trainedModel = trainModel(ModelType, trainInputs, trainLabels)
	evaluateModel(trainedModel, devInputs, devLabels)
	evaluateModel(trainedModel, testInputs, testLabels)