Example #1
0
def datasetToSource( user, dataset, pattern='.*root', readCache=False):

    # print user, dataset, pattern
    data = createDataset(user, dataset, pattern, readCache)
    
    source = cms.Source(
	"PoolSource",
	noEventSort = cms.untracked.bool(True),
	duplicateCheckMode = cms.untracked.string("noDuplicateCheck"),
	fileNames = cms.untracked.vstring()
        )
    
    source.fileNames.extend( data.listOfGoodFiles() )

    return source
Example #2
0
def CreateModel(dataset_fileName, use_mid=False, from_raw=False):

    if from_raw:
        X, y, Decoder = dataset.createDataset()
    else:
        X, y = joblib.load(dataset_fileName)
    if use_mid:
        accx_mid = X[:, 2, :].reshape(100, 1, 50)
        accy_mid = X[:, 7, :].reshape(100, 1, 50)
        accz_mid = X[:, 12, :].reshape(100, 1, 50)
        gryx_mid = X[:, 17, :].reshape(100, 1, 50)
        gyry_mid = X[:, 22, :].reshape(100, 1, 50)
        gyrz_mid = X[:, 27, :].reshape(100, 1, 50)
        X = np.concatenate(
            (accx_mid, accy_mid, accz_mid, gryx_mid, gyry_mid, gyrz_mid),
            axis=1)
        print(X.shape)
    X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
    x_train, x_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        random_state=24)
    print("split complete")
    params = {'C': [0.001, 0.01, 0.1, 1], 'kernel': ['linear', 'rbf', 'poly']}
    svc = svm.SVC(probability=True)
    clf = GridSearchCV(svc, params, verbose=10, n_jobs=8)
    print("fitting Model")
    clf.fit(x_train, y_train)

    print("Confusion Matrix:")
    Y_predicted = clf.predict(x_test)
    print(confusion_matrix(y_test, Y_predicted))
    print("\nBest estimator parameters: ")
    print(clf.best_estimator_)

    #Calculates the score of the best estimator found.
    score = clf.score(x_test, y_test)
    print("\nSCORE: {score}\n".format(score=score))

    print("Saving model....")
    if use_mid:
        joblib.dump(clf, "MODEL_MID.pkl")
    else:
        joblib.dump(clf, "MODEL.pkl")
    return score
        ToTensor(),
        Normalize([.485, .456, .406], [.229, .224, .225]),
        RandomHorizontalFlip(),
    ]
else:
    print("without data augmentation")
    transformList = [
        Resize(opt.loadSize),
        ToTensor(),
        Normalize([.485, .456, .406], [.229, .224, .225])
    ]

transform = Compose(transformList)

supervisedADataset = createDataset([opt.supervisedADataset],
                                   transform=transform,
                                   outputFile=False)[0]
supervisedBDataset = createDataset([opt.supervisedBDataset],
                                   transform=transform,
                                   outputFile=False)[0]
unsupervisedADataset = createDataset([opt.unsupervisedADataset],
                                     transform=transform,
                                     outputFile=False)[0]
unsupervisedBDataset = createDataset([opt.unsupervisedBDataset],
                                     transform=transform,
                                     outputFile=False)[0]

dataset = CycleMcdDataset(supervisedA=supervisedADataset,
                          unsupervisedA=unsupervisedADataset,
                          supervisedB=supervisedBDataset,
                          unsupervisedB=unsupervisedBDataset)
        Resize(opt.loadSize),
        ToTensor(),
        Normalize([.485, .456, .406], [.229, .224, .225]),
        RandomHorizontalFlip(),
    ]
else:
    transformList = [
        Resize(opt.loadSize),
        ToTensor(),
        Normalize([.485, .456, .406], [.229, .224, .225])
    ]

transform = Compose(transformList)

sourceDataset = createDataset(opt.sourceDataset,
                              transform=transform,
                              outputFile=False)

targetDataset = createDataset(opt.targetDataset,
                              transform=transform,
                              outputFile=False)

dataLoader = torch.utils.data.DataLoader(ConcatDataset(source=sourceDataset,
                                                       target=targetDataset),
                                         batch_size=opt.batchSize,
                                         shuffle=True)

# set visualizer

visualizer = Visualizer(opt, dataLoader.dataset).reset()
Example #5
0
            recommendedSongs = recommendedSongs.append(
                pd.Series(row, index=recommendedSongs.columns, name=index))

    return recommendedSongs[['genre', 'track_id', 'artist_name', 'title']]


timbreFeaturesAndGenre = [
    'avg_timbre1', 'avg_timbre2', 'avg_timbre3', 'avg_timbre4', 'avg_timbre5',
    'avg_timbre6', 'avg_timbre7', 'avg_timbre8', 'avg_timbre9', 'avg_timbre10',
    'avg_timbre11', 'avg_timbre12', 'var_timbre1', 'var_timbre2',
    'var_timbre3', 'var_timbre4', 'var_timbre5', 'var_timbre6', 'var_timbre7',
    'var_timbre8', 'var_timbre9', 'var_timbre10', 'var_timbre11',
    'var_timbre12', 'track_id', 'genre'
]

datasetMinusSamples, testingDataset = createDataset()
datasetMinusSamples = removeTestingSongsFromDataset(datasetMinusSamples,
                                                    testingDataset)
formattedDataset = datasetMinusSamples[[
    'genre', 'track_id', 'artist_name', 'title', 'avg_timbre1'
]]
formattedDataset.to_csv('testingDataSet.csv', sep='\t')

featureVector = testingDataset[timbreFeaturesAndGenre]
kMeans = KMeans(n_clusters=10)
timbreFeatures = timbreFeaturesAndGenre[:-2]
kMeans.fit(featureVector[timbreFeatures])
featureVector.loc[:, 'genre'] = kMeans.labels_

genreNumbers = featureVector[['genre']]
genreTitles = testingDataset[['genre']]
        RandomResizedCrop(),
        Resize(opt.loadSize),
        ToTensor(),
        Normalize([.485, .456, .406], [.229, .224, .225]),
        RandomHorizontalFlip(),
    ]
else:
    transformList = [
        Resize(opt.loadSize),
        ToTensor(),
        Normalize([.485, .456, .406], [.229, .224, .225])
    ]

transform = Compose(transformList)

datasetA = createDataset([opt.datasetA],
        transform= transform, outputFile = False)

datasetB = createDataset([opt.datasetB],
        transform= transform, outputFile = False)

dataset = ConcatDataset(
    source = datasetA,
    target = datasetB) 

dataLoader= torch.utils.data.DataLoader(
    dataset, batch_size= opt.batchSize, shuffle=True)

# set visualizer

visualizer = Visualizer(opt, dataLoader.dataset).reset()
Example #7
0
import sys
from dataset import createDataset
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules import TanhLayer
from time import sleep

def getArgOne():
    # if len(sys.argv) < 2:
    #     raise NameError("no argument given")
    # elif len(sys.argv) > 2:
    #     raise NameError("too much arguments given")
    return sys.argv[1]

filename = getArgOne() # read filename
((dsTrn, dsTst), (scalizer, normalizer)) = createDataset(filename) # create dataset ##############
print "Training data length:", len(dsTrn)
print "Test data length:", len(dsTst)
# for inpt, target in dsTst:
#     print inpt, target
# print dsTrn['input']
# print dsTrn['target']
net = buildNetwork(dsTrn.indim, dsTrn.indim, dsTrn.indim / 2, dsTrn.outdim, bias = True, recurrent = True)# create NN
trainer = BackpropTrainer(net, dsTrn, learningrate = 0.03, momentum = 0.1, verbose = True)
print "Network :", net
sleep(3)
# for epoch in range(0, 1000):
#     print "Epoch", epoch 
#     trainer.train()
trainer.trainUntilConvergence(maxEpochs = 10000) # train NN
# test result
Example #8
0
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme

from blocks.extensions import FinishAfter, Printing, ProgressBar
#from blocks.extensions.saveload import load
from blocks.serialization import load
from blocks.monitoring import aggregation  # ???

from dataset import Corpus, createDataset

args = getArguments()

corpus = Corpus(open(args.corpus).read())
train_data, vocab_size = createDataset(corpus=corpus,
                                       sequence_length=750,
                                       repeat=20)

if args.mode == "train":
    seq_len = 100
    dim = 100
    feedback_dim = 100

    # Build the bricks and initialize them
    transition = GatedRecurrent(name="transition", dim=dim, activation=Tanh())
    generator = SequenceGenerator(
        Readout(
            readout_dim=vocab_size,
            source_names=["states"],  # transition.apply.states ???
            emitter=SoftmaxEmitter(name="emitter"),
            feedback_brick=LookupFeedback(vocab_size,
Example #9
0
import dataset
import tensorflow as tf
import cv2
import numpy as np
import matplotlib.pyplot as plt

tfrecord_path_train = 'D:\Coco\coco_train.tfrecord'
dataset = dataset.createDataset(tfrecord_path_train)

category_map = {
    0: '',
    1: 'person',
    2: 'bicycle',
    3: 'car',
    4: 'motorcycle',
    5: 'airplane',
    6: 'bus',
    7: 'train',
    8: 'truck',
    9: 'boat',
    10: 'traffic light',
    11: 'fire hydrant',
    13: 'stop sign',
    14: 'parking meter',
    15: 'bench',
    16: 'bird',
    17: 'cat',
    18: 'dog',
    19: 'horse',
    20: 'sheep',
    21: 'cow',
Example #10
0
from dataset import Corpus, createDataset
from rnn_model import create_rnn


if __name__ == "__main__":
    args = parser.parse_args()

    if args.retrain:
        main_loop = load(args.model)
    else:
        # create Corpus and Dateset
        corpus = Corpus(open(args.corpus).read())
        train_data,vocab_size = createDataset(
            corpus = corpus,
            sequence_length = 750,
            repeat = 20
        )
        # create Computation Graph
        cg, layers, y_hat, cost = create_rnn(args.hidden, vocab_size, mode=args.mode)
        # create training loop
        main_loop = MainLoop(
            data_stream = DataStream(
                train_data,
                iteration_scheme = SequentialScheme(
                    train_data.num_examples,
                    batch_size = 50
                )
            ),
            algorithm = GradientDescent(
                cost  = cost,
Example #11
0
# PyTorch includes
import torch
from torch.autograd import Variable
# Custom includes
from options import TrainOptions
from dataset import createDataset
from models import createModel
from visualizer import ProgressVisualizer

assert Variable

parser = TrainOptions()
opt = parser.parse()

# set dataloader
trainDataset = createDataset(opt, split='train', nInput=opt.nInput)
valDataset = createDataset(opt, split='val', nInput=opt.nInput)

trainDataLoader = torch.utils.data.DataLoader(trainDataset,
                                              batch_size=opt.batchSize,
                                              shuffle=True,
                                              num_workers=opt.nThreads)

valDataLoader = torch.utils.data.DataLoader(valDataset,
                                            batch_size=opt.batchSize,
                                            shuffle=True,
                                            num_workers=opt.nThreads)
# set model

model = createModel(opt)
model.setup(opt)
Example #12
0
    plt.show()


opt = TestOptions().parse()

# set dataloader

transformList = [
    Resize(opt.loadSize),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225])
]

transform = Compose(transformList)

dataset = createDataset(opt.dataset, transform=transform, outputFile=True)

dataLoader = torch.utils.data.DataLoader(dataset,
                                         batch_size=opt.batchSize,
                                         shuffle=False)

# set model

model = createModel(opt)
model.setup(opt)
model.eval()

# set visualizer
visualizer = TestVisualizer(opt, dataset)

# set evaluator
import dataset
import readability
import information
import textProp
import numpy as np
from sklearn.svm import SVC

features_train = []
labels_train = []

dataset.createDataset()
data = dataset.data
data = sorted(data, key=lambda k: k['qId'])

count = 0
for row in data:
    featureVector = []
    ts = readability.TextStatistics()
    tp = textProp.TextProperties()
    inf = information.Informativity()
    totalEntropy = 0
    model, stats = inf.markov_model(inf.chars(row['answerBody']), 3)
    for prefix in stats:
        totalEntropy = totalEntropy + inf.entropy(stats, stats[prefix])
    featureVector.append(abs(totalEntropy))  # information
    featureVector.append(
        tp.relevancy(row['answerBody'], row['qBody'],
                     row['qTags']))  # relevancy
    featureVector.append(tp.UniqueWords(row['answerBody']))  # unique
    featureVector.append(tp.NonstopWords(row['answerBody']))  # nonstop
    featureVector.append(tp.subjective(row['answerBody']))  # subjectivity