def main(argv):

    modelSpecs = config.InitializeModelSpecs()
    modelSpecs = ParseCommandLine.ParseArguments(argv, modelSpecs)

    ## load the datasets. Data is a list of proteins and each protein is represented as a dict()
    Data = DataProcessor.LoadDistanceLabelMatrices(modelSpecs['dataset'],
                                                   modelSpecs=modelSpecs)
    print '#proteins loaded from the dataset: ', len(Data)
    allProteins = [d['name'] for d in Data]

    print 'Preparing batch data for training...'
    groupSize = modelSpecs['minibatchSize']
    batches = DataProcessor.SplitData2Batches(data=Data,
                                              numDataPoints=groupSize,
                                              modelSpecs=modelSpecs)
    print "#batches:", len(batches)

    ## add code here to calculate empirical reference state
    ## RefState is a dict, RefState[response] = (length-independent ref, length-dependent ref)
    ## length-independent ref is an 1d array, length-dependent ref is a list with each element being an tuple (length, 1d array)
    RefState = CalcRefState(batches=batches, modelSpecs=modelSpecs)
    RefState['dataset'] = modelSpecs['dataset']
    RefState['proteins'] = allProteins

    ## save RefState
    responseStr = '-'.join(modelSpecs['responses'])
    file4save = 'EmpRefState-' + responseStr + '-' + str(os.getpid()) + '.pkl'
    fh = open(file4save, 'wb')
    cPickle.dump(RefState, fh, protocol=cPickle.HIGHEST_PROTOCOL)
    fh.close()

    ## print the length-ind reference state
    for response in modelSpecs['responses']:
        print RefState[response][0]
Beispiel #2
0
def main():
    # learn embeddings
    word2vec.word2vec()
    # convert training,test and eval data into np arrays
    DataProcessor.build_data()
    # this calculates sentiments for the data
    lstm.lstm_script()
Beispiel #3
0
def get_accuracy_windowed(classifier, parameters, length, slide, arms, wrists,
                          detections):
    #Assemble training data
    armIMU = arms[0]
    wristIMU = wrists[0]
    detection = detections[0]
    for i in range(1, 5):
        armIMU = np.concatenate((armIMU, arms[i]), axis=0)
        wristIMU = np.concatenate((wristIMU, wrists[i]), axis=0)
        detection = np.concatenate((detection, detections[i]), axis=0)

    train_arm, train_wrist, train_detect = dp.apply_window(
        length, slide, armIMU, wristIMU, detection)
    print("Window applied to training data")

    #Assemble validation data
    arm, wrist, time, detect = dr.get_data(6)
    detect = detect.astype(int)
    detect = detect.ravel()
    val_arm, val_wrist, val_detect = dp.apply_window(length, slide, arm, wrist,
                                                     detect)
    print("Window applied to validation data")

    train_arm_wrist = np.concatenate((train_arm, train_wrist), axis=1)
    val_arm_wrist = np.concatenate((val_arm, val_wrist), axis=1)

    accuracies = perform_classification(classifier, parameters, slide,
                                        train_arm_wrist, train_detect,
                                        val_arm_wrist, val_detect)

    return accuracies
Beispiel #4
0
def CalcFeatureExpectBySampling(metaData, modelSpecs):
    seqfeatures = []
    seqweights = []

    matrixfeatures = []
    matrixweights = []

    embedfeatures = []
    embedweights = []

    dataLocation = DataProcessor.SampleProteinInfo(metaData)
    for loc in dataLocation:
        d = DataProcessor.LoadRealData(loc, modelSpecs, loadLabel=False)
        res = CalcFeatureExpect4OneProtein(d)
        seqfeature, seqweight, matrixfeature, matrixweight = res[:4]
        seqfeatures.append(seqfeature)
        matrixfeatures.append(matrixfeature)
        seqweights.append(seqweight)
        matrixweights.append(matrixweight)

        if len(res) == 6:
            embedfeature, embedweight = res[5:]
            embedfeatures.append(embedfeature)
            embedweights.append(embedweight)

    modelSpecs['seqFeatures_expected'] = np.average(seqfeatures,
                                                    axis=0,
                                                    weights=seqweights)
    modelSpecs['matrixFeatures_expected'] = np.average(matrixfeatures,
                                                       axis=0,
                                                       weights=matrixweights)
    modelSpecs['embedFeatures_expected'] = np.average(embedfeatures,
                                                      axis=0,
                                                      weights=embedweights)
def TrainDataLoader3(sharedQ, sharedLabelPool, sharedLabelWeightPool, stopTrainDataLoader, trainMetaData, modelSpecs, assembleData=True, UseSharedMemory=False):
	#print 'trainDataLoader has event: ', stopTrainDataLoader

	## here we use labelPool to cache the labels of all the training proteins
	## one protein may have multiple sets of input features due to MSA sampling or sequnence-template alignment
	## but it can only have one set of label matrices, so it is worth to save all label matrices in RAM.
	labelPool = dict()
	labelWeightPool = dict()

	## load the labels of all training proteins
	trainDataLocation = DataProcessor.SampleProteinInfo(trainMetaData)
	for loc in trainDataLocation:
		d = DataProcessor.LoadRealData(loc, modelSpecs, loadFeature=False, returnMode='list')
		name = d['name']
		labelPool[name] = d['atomLabelMatrix']
		labelWeightMatrix = LabelUtils.CalcLabelWeightMatrix(LabelMatrix=d['atomLabelMatrix'], modelSpecs=modelSpecs, floatType=np.float16)
		labelWeightPool[name] = labelWeightMatrix

	print 'TrainDataLoader with #PID ', os.getpid(), ' has loaded ', len(labelPool), ' label matrices  and ', len(labelWeightPool), ' label weight matrices'
	## update labelPool and labelWeightPool to the shared dict()
	sharedLabelPool.update(labelPool)
	sharedLabelWeightPool.update(labelWeightPool)
	print 'TrainDataLoader with #PID ', os.getpid(), ' has update the shared labelPool and labelWeightPool'

	while True:
		if stopTrainDataLoader.is_set() or os.getppid()==1:
			print 'trainDataLoader receives the stop signal'
			break

		trainDataLocation = DataProcessor.SampleProteinInfo(trainMetaData)
		numOriginals = len(trainDataLocation)
		"""
		maxLen = 900
		trainDataLocation, numExcluded = DataProcessor.FilterByLength(trainDataLocation, maxLen)
		print 'Exclude ', numExcluded, ' train proteins longer than ', maxLen, ' AAs'
		"""
		trainSeqData = DataProcessor.SplitData2Batches(trainDataLocation, numDataPoints=modelSpecs['minibatchSize'], modelSpecs=modelSpecs)
		random.shuffle(trainSeqData)
		for batch in trainSeqData:
			if stopTrainDataLoader.is_set() or os.getppid()==1:
				print 'trainDataLoader receives the stop signal'
				break

			names = [ p['name'] for p in batch ]
			data = []
			for protein in batch:
				d = DataProcessor.LoadRealData(protein, modelSpecs, loadLabel=False, returnMode='list')
				data.append(d)

			FeatureUtils.CheckModelNDataConsistency(modelSpecs, data)
			if assembleData:
				data = PrepareInput4Train(data, modelSpecs, floatType=np.float16, UseSharedMemory=UseSharedMemory)
			#print 'putting data to trainDataLoader queue...'
			sharedQ.put( (data, names) )

	print 'TrainDataLoader has finished loading data'
	sharedQ.close()
def TrainByOneBatch(batch, train, modelSpecs, forRefState=False):

    ## batch is a list of protein locations, so we need to load the real data here
    minibatch = DataProcessor.LoadRealData(batch, modelSpecs)

    ## add code here to make sure that the data has the same input dimension as the model specification
    FeatureUtils.CheckModelNDataConsistency(modelSpecs, minibatch)

    onebatch, names4onebatch = DataProcessor.AssembleOneBatch(
        minibatch, modelSpecs, forRefState=forRefState)
    x1d, x2d, x1dmask, x2dmask = onebatch[0:4]

    ## crop a large protein to deal with limited GPU memory. For sequential and embedding features, the theano model itself will crop based upon bounding box
    bounds = SampleBoundingBox((x2d.shape[1], x2d.shape[2]),
                               modelSpecs['maxbatchSize'])

    #x1d_new = x1d[:, bounds[1]:bounds[3], :]
    x1d_new = x1d
    x2d_new = x2d[:, bounds[0]:bounds[2], bounds[1]:bounds[3], :]
    #x1dmask_new = x1dmask[:, bounds[1]:x1dmask.shape[1] ]
    x1dmask_new = x1dmask
    x2dmask_new = x2dmask[:, bounds[0]:x2dmask.shape[1], bounds[1]:bounds[3]]

    input = [x1d_new, x2d_new, x1dmask_new, x2dmask_new]

    ## if embedding is used
    ##if any( k in modelSpecs['seq2matrixMode'] for k in ('SeqOnly', 'Seq+SS') ):
    if config.EmbeddingUsed(modelSpecs):
        embed = onebatch[4]
        #embed_new = embed[:, bounds[1]:bounds[3], : ]
        embed_new = embed
        input.append(embed_new)

        remainings = onebatch[5:]
    else:
        remainings = onebatch[4:]

##crop the ground truth and weight matrices
    for x2d0 in remainings:
        if len(x2d0.shape) == 3:
            input.append(x2d0[:, bounds[0]:bounds[2], bounds[1]:bounds[3]])
        else:
            input.append(x2d0[:, bounds[0]:bounds[2], bounds[1]:bounds[3], :])

    ## add bounding box to the input list
    input.append(bounds)

    if config.TrainByRefLoss(modelSpecs):
        if forRefState:
            input.append(np.int32(-1))
        else:
            input.append(np.int32(1))

    train_loss, train_errors, param_L2 = train(*input)

    return train_loss, train_errors, param_L2
Beispiel #7
0
def icir(factor, r, n=20, rank=False):
    if rank:
        x1 = DP.standardize(rankdata(factor))
    else:
        x1 = DP.standardize(factor)
    x2 = DP.standardize(r)
    ic = (x1 * x2).mean(1).fillna(0)
    ir = ic.rolling(20).mean() / ic.rolling(20).std()

    return ic, ir
def DetermineFeatureDimensionBySampling(metaData, modelSpecs):

	protein = DataProcessor.SampleProteinInfo(metaData, numSamples=1)[0]
        d = DataProcessor.LoadRealData(protein, modelSpecs, loadLabel=False, returnMode='list')

        ## obtain the dimension of each type of input feature
	modelSpecs['n_in_seq'] = DetermineNumSeqFeatures(d['seqFeatures'])
	modelSpecs['n_in_matrix'] = DetermineNumMatrixFeatures(d['matrixFeatures']) + DetermineNumMatrixFeatures(d['matrixFeatures_nomean'])

        if d.has_key('embedFeatures'):
                modelSpecs['n_in_embed'] = d['embedFeatures'].shape[1]
def CalcLabelDistributionNWeightBySampling(trainMetaData, modelSpecs):
    trainDataLocation = DataProcessor.SampleProteinInfo(trainMetaData,
                                                        numSamples=10000)

    ## only load ground truth but not input features to save memory and speed up
    labelData = []
    for loc in trainDataLocation:
        p = DataProcessor.LoadRealData(loc, modelSpecs, loadFeature=False)
        labelData.append(p)

    CalcLabelDistributionAndWeight(labelData, modelSpecs)
Beispiel #10
0
def main():
    q = input("enter a query to be processed> ")
    while not q:
        q = input("no empty queries please> ")

    dp = DataProcessor()
    # list_doc = dp.process_texts(sys.argv[1:])


    reuters_texts = []
    #Working with the first 50 files from the reuters library
    reuters_data = reuters.fileids()[:200]
    for data in reuters_data:
        file_str = "" #concatinate file to string
        file = reuters.open(data)
        for line in file:
            file_str = file_str + line
        file_str = file_str.replace('\n','')
        file_str = file_str.replace("  "," ")
        file_str = file_str.replace("   ", " ")
        reuters_texts.append(file_str)

    # for text in reuters_texts:
    #     print(str(text)+"\n")
    # #print(reuters_texts) # used for debugging purposes

    [document_frequency, term_frequency_document] = dp.inverted_index(reuters_texts)
    """returns the document frequency and term frequency document
    ITS A MUST WHEN CALCULATING THE TF-IDF
    """
    term_weights = dp.compute_weights(term_frequency_document,reuters_texts)
    # print the term weights
    # for term,weights in term_weights.items():
    #     print(term," ",weights)

    print("document_frequency: ", document_frequency)
    [total_collection, total_distinct_terms] = dp.get_collection_lengths(reuters_texts)
    [similarity,sorted_doc_list] = dp.bm25(reuters_texts,document_frequency,term_frequency_document,q)
    document_lengths = dp.get_doc_length(reuters_texts)
    query_likelyhood_scores = dp.query_likelyhood(reuters_texts,document_lengths,total_collection,total_distinct_terms,.5)
    modded_query_vector = dp.rocchioAlgorithm(reuters_texts,term_weights,q,1,1,1)
    precision_score = dp.precision(q,reuters_texts)


    #output statements
    #print("total_collection: ",total_collection)
    #print("document lengths: " ,document_lengths)
    print("Query: ",q)
    print("using bm25 smoothing: ", similarity)
    #print("sorted_doc_list: ",sorted_doc_list)
    print("query_likelyhood_scores: ",query_likelyhood_scores)
    print("modded_query_vector taken from Rocchios algorithm: ",modded_query_vector)
    print("precision score from precision function for the query " + q + ": ", precision_score)
def ValidDataLoader2(sharedQ, stopValidDataLoader, validSeqData, modelSpecs, assembleData=True, UseSharedMemory=False):

	bUseCCMFnorm, bUseCCMsum, bUseCCMraw, bUseFullMI, bUseFullCov = config.ParseExtraCCMmode(modelSpecs)
	if any([bUseCCMraw, bUseFullMI, bUseFullCov]):
		## when full coevolution matrices are used, we shall use float16 to save memory
		floatType = np.float16
	else:
		floatType = theano.config.floatX

	#print 'validDataLoader has event: ', stopValidDataLoader
	for batch in validSeqData:
		if stopValidDataLoader.is_set() or os.getppid()==1:
			#print 'validDataLoader receives the stop signal'
			break

		 ## Load real data for one batch
                data = DataProcessor.LoadRealData(batch, modelSpecs, returnMode='list')

                ## add code here to make sure that the data has the same input dimension as the model specification
                FeatureUtils.CheckModelNDataConsistency(modelSpecs, data)

		if assembleData:
			data = PrepareInput4Validate(data, modelSpecs, floatType=floatType, UseSharedMemory=UseSharedMemory)
		#print 'putting data to validDataLoader queue...'
		sharedQ.put(data)

	print 'validDataLoader has finished loading data'
	sharedQ.close()
Beispiel #12
0
 def ReplaceMissing(self,df: pd.DataFrame) -> pd.DataFrame:
     #length = 3
     #Create a dataprocessor object and convert the data in the csv and change all missing attribtues 
     Dp = DataProcessor.DataProcessor()
     #Start the process to change the integrity of the dataframe from within the data processor
     data = Dp.ReplaceMissingValue(df) 
     return data 
Beispiel #13
0
def get_batch(batch_size):
    batch = []
    while (len(batch) < batch_size):
        for op in operators:  # for each operator...
            data = []

            img, inclasses = next(
                iter(dataloaders[op]
                     ))  # Gather a set of images and classes from them

            for i in range(len(img)):

                np = DataProcessor.get_neg_pairs([op, objects[inclasses[i]]],
                                                 operators, objects)

                ## Image, Object, Operator, nObject, nOperator
                data.append([
                    Variable(feat_extractor(img[i].unsqueeze_(0))),
                    int(inclasses[i]),
                    int(operators.index(op)), np
                ])

                if (len(data) == batch_size):
                    break

            batch = batch + data

    return [batch[i] for i in (random.sample(range(len(batch)), batch_size))]
Beispiel #14
0
def train_model_lstm(window_size=200, step_size=150):
    data = dp.get_dataset_windowed(window_size, step_size)

    model = create_baseline_lstm(data[0][0].shape)

    history = model.fit(data[0],
                        data[1],
                        batch_size=64,
                        epochs=100,
                        validation_split=0.2,
                        verbose=0,
                        callbacks=callbacks)

    model.save("model_lstm.h5")

    file = open("results.txt", "a")
    file.writelines([
        'RNN results: \n', 'Topology: [LSTM(200),Dense(10)] \n',
        'Accuracy: {} \n'.format(history.history['accuracy'][-1]),
        'Val accuracy: {} \n'.format(history.history['val_accuracy'][-1]),
        'Loss: {} \n'.format(history.history['loss'][-1]),
        'Val loss: {} \n'.format(history.history['val_loss'][-1]), '\n'
    ])
    file.close()

    return model
Beispiel #15
0
def train_model_dnn():
    data = dp.get_dataset_flat()

    model = create_baseline_dnn()

    history = model.fit(data[0],
                        data[1],
                        batch_size=64,
                        epochs=100,
                        validation_split=0.2,
                        verbose=0,
                        callbacks=callbacks)

    model.save("model_dnn.h5")

    file = open("results.txt", 'a')
    file.writelines([
        'DNN results: \n',
        'Accuracy: {} \n'.format(history.history['accuracy'][-1]),
        'Val accuracy: {} \n'.format(history.history['val_accuracy'][-1]),
        'Loss: {} \n'.format(history.history['loss'][-1]),
        'Val loss: {} \n'.format(history.history['val_loss'][-1]), '\n'
    ])
    file.close()

    return model
Beispiel #16
0
def compute_confusion_matrix(model):
    data = dp.get_dataset_windowed(200, 150)
    y_pred = model.predict(data[0])

    cm = confusion_matrix(data[1].argmax(axis=1), y_pred.argmax(axis=1))
    print(cm)
    plt.matshow(cm, cmap='Blues')
    plt.title("Confusion Matrix")
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

    classes = ["Walk", "Tölt", "Trot", "Canter"]
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)

    thresh = cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        # if normalize:
        #     plt.text(j, i, "{:0.4f}".format(cm[i, j]),
        #              horizontalalignment="center",
        #              color="white" if cm[i, j] > thresh else "black")
        # else:
        plt.text(j,
                 i,
                 "{:,}".format(cm[i, j]),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.show()

    res = model.evaluate(data[0], data[1])

    print(res)
Beispiel #17
0
def init(inFile, outFile):
    global MPIFX3, MPIHandler, MPIProcessor
    global dev, handler, processor
    MPIFX3 = mp.Queue()
    MPIHandler = mp.Queue()
    MPIProcessor = mp.Queue()

    dev = FX3.Emulator(MPIFX3, inFile)
    pipe = dev.getPipe()
    buffSize = dev.getBufferSize()

    handler = DataHandler.DataHandler(MPIHandler,
                                      pipe,
                                      buffSize,
                                      filename=outFile)
    realtime = handler.getRealtimeQueue()
    handler.enableRealtime()

    processor = DataProcessor.DataProcessor(MPIProcessor,
                                            realtime, [[0, 2], [3, 3]],
                                            legacy=False,
                                            fs=2.5E6,
                                            bufferSize=buffSize,
                                            calcFlow=True,
                                            numProcessors=2)
Beispiel #18
0
 def Predict(self, newdata):
     processed = []
     processed.append(DataProcessor.Process(newdata[1]))
     tfdata = self.tfidfconverter.Transform(processed)
     pred = self.clf.predict(tfdata)
     print(newdata, " ", pred)
     print(" ")
def calc(datasetIndex, multiplierInt):
    csv = pd.DataFrame(columns=['dataset', 'bins', 'f1', 'zero-one'])
    exp = ((multiplierInt + 1) / 2)
    bins = math.ceil(2**exp)
    results = []
    for k in range(trials):
        dp = DataProcessor.DataProcessor(bin_count=bins)
        binnedDataset = dp.StartProcess(datasets[datasetIndex])
        N, Q, F, testData = train(binnedDataset)

        model = Classifier.Classifier(N, Q, F)
        classifiedData = model.classify(testData)

        stats = Results.Results()
        zeroOne = stats.ZeroOneLoss(classifiedData)
        macroF1Average = stats.statsSummary(classifiedData)
        datapoint = {
            'dataset': dataset_names[datasetIndex],
            'bins': bins,
            'f1': macroF1Average,
            'zero-one': zeroOne / 100
        }
        print(datapoint)
        csv = csv.append(datapoint, ignore_index=True)
        # trial = {"zeroOne": zeroOne, "F1": macroF1Average}
        # results.append(trial)
        # print(trial)
    data.append(csv)
Beispiel #20
0
def generate_windowed_data(length, slide, fs):
    train_arm, train_wrist, train_detect = dr.get_train_data()
    train_arm = np.vstack(train_arm)
    train_wrist = np.vstack(train_wrist)
    train_detect = np.hstack(train_detect)

    _, _, train_detect_window = dp.apply_window(length, slide, train_arm,
                                                train_wrist, train_detect)
    train_arm_window = da.apply_window_features(train_arm, length, slide, fs)
    train_wrist_window = da.apply_window_features(train_wrist, length, slide,
                                                  fs)

    print("Training: Full Arm Shape: " + str(np.shape(train_arm)))
    print("Training: Full Wrist Shape: " + str(np.shape(train_wrist)))
    print("Training: Full Detection Shape: " + str(np.shape(train_detect)))
    print("Training: Windowed Arm Shape: " + str(np.shape(train_arm_window)))
    print("Training: Windowed Wrist Shape: " +
          str(np.shape(train_wrist_window)))
    print("Training: Windowed Detect Shape: " +
          str(np.shape(train_detect_window)))

    val_arm, val_wrist, val_detect = dr.get_val_data()
    val_arm = np.vstack(val_arm)
    val_wrist = np.vstack(val_wrist)
    val_detect = np.hstack(val_detect)

    _, _, val_detect_window = dp.apply_window(length, slide, val_arm,
                                              val_wrist, val_detect)
    val_arm_window = da.apply_window_features(val_arm, length, slide, fs)
    val_wrist_window = da.apply_window_features(val_wrist, length, slide, fs)

    print("Validation: Windowed Arm Shape: " + str(np.shape(val_arm_window)))
    print("Validation: Windowed Wrist Shape: " +
          str(np.shape(val_wrist_window)))
    print("Validation: Windowed Detect Shape: " +
          str(np.shape(val_detect_window)))

    data = {
        "val_arm": val_arm_window,
        "val_wrist": val_wrist_window,
        "val_detect": val_detect_window,
        "train_arm": train_arm_window,
        "train_wrist": train_wrist_window,
        "train_detect": train_detect_window
    }

    return data
Beispiel #21
0
 def del_inventory(inventory_id, update_controls=[None]):
     ip = dp.InventoryProcessor(database)
     sql = ip.build_del_code(inventory_id=inventory_id)
     ip.execute_sql_code(sql)
     ip.db_con.commit()
     ip.db_con.close()
     if update_controls is not None:
         IOProcessor.sel_inventory(update_controls[0])
Beispiel #22
0
 def del_product(product_id, update_controls=[None]):
     pp = DP.ProductsProcessor('Python210FinalDB.db')
     sql = pp.build_del_code(product_id=product_id)
     pp.execute_sql_code(sql)
     pp.db_con.commit()
     pp.db_con.close()
     if update_controls is not None:
         IOProcessor.sel_product(update_controls[0])
Beispiel #23
0
 def del_product(product_id, update_controls=[None]):
     pp = dp.ProductProcessor(database)
     sql = pp.build_del_code(product_id=product_id)
     pp.execute_sql_code(sql)
     pp.db_con.commit()
     pp.db_con.close()
     if update_controls is not None:
         IOProcessor.sel_product(update_controls[0])
Beispiel #24
0
 def del_inventory_counts(inventory_id, update_controls=[None]):
     pp = DP.InventoryCountProcessor('Python210FinalDB.db')
     sql = pp.build_del_code(inventory_id=inventory_id)
     pp.execute_sql_code(sql)
     pp.db_con.commit()
     pp.db_con.close()
     if update_controls is not None:
         IOProcessor.sel_inventorycounts(update_controls[0])
Beispiel #25
0
def RunIVSweep(sweepSpec):
    KE2010.Initialize()
    KE237.Initialize()
    KE237.TurnOutputOn()
    presentV = sweepSpec.startV
    presentI = 0
    VIList = []
    while (presentV < sweepSpec.stopV):
        print('presentV: ' + str(presentV))
        KE237.SetVoltage(presentV)
        presentI = KE2010.MeasureCurrent()
        print('presentI: ' + str(presentI))
        VIList.append((presentV, presentI))
        presentV = presentV + sweepSpec.stepV
    KE237.TurnOutputOff()
    KE2010.GPIBReset()
    dp.SaveListToCsv(VIList, name=HCPName)
    dp.FitLineFromCsv(name=HCPName)
Beispiel #26
0
def SaveDataToFile():
    try:
        objF = DataProcessor.File()
        objF.FileName = "EmployeeData.txt"
        objF.TextData = Employees.EmployeeList.ToString()
        print("Reached here")
        objF.SaveData()
    except Exception as e:
        print(e)
Beispiel #27
0
def SaveDataToFile():
    try:
        objF = DataProcessor.File()
        objF.FileName = "CustomerData.txt"
        objF.TextData = Customers.CustomerList.ToString()
        print("Reached here")
        objF.SaveData()
    except Exception as e:
        print(e)
Beispiel #28
0
def get_attention_weights(data, embds):
    tf.reset_default_graph()

    it = 0

    now = "han_100d_163b_50cx_0.0001_0.5d"
    with tf.Session() as sess:
        model = HierarchicalAttention(
            num_classes=2,
            vocab_size=embds.shape[0],
            embedding_size=embds.shape[1]
        )
    root_logdir = "logs"
    logdir = "{}/run-{}-{}/".format(root_logdir, now, it)

    checkpoint_dir = "{}checkpoints".format(logdir)
    saver = tf.train.Saver()
    # saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_dir))
    # Training model
    # training_op, global_step = model.optimize()
    sess.run(tf.global_variables_initializer())
    sess.run(model.embedding_init, feed_dict={model.embedding_placeholder: embds})
    saver.restore(sess, checkpoint_dir)
    predictions = model.predict()

    # print("Evaluation:")
    x_val, y_val, sent_lengths_val, seq_lengths_val = data.fetch_val()
    feed_dict = {model.x: x_val, model.y: y_val, model.sent_lengths: sent_lengths_val,
                 model.seq_lengths: seq_lengths_val, model.dropout_keep_prob: 1,
                 model.max_seq_length: data.val_max_seq_length,
                 model.max_sent_length: data.val_max_sent_length
                 }
    pred, a_word, a_sent = sess.run([predictions, model.alphas_word, model.alphas_sent], feed_dict=feed_dict)
    #pred, a1, A = sess.run([predictions, model.alphas1, model.alphas2, model.alphas3, model.alphas4],
                                    #feed_dict=feed_dict)
    a_word = np.reshape(a_word, [-1, data.val_max_seq_length, data.val_max_sent_length, 1])

    # filter on correct predictions
    zipped = list(zip(x_val, pred['labels'], pred['predictions'], pred['probabilities'], a_word, a_sent))
    # print(zipped[0:2])
    selection = [list(x) for x in zipped][133]
    zipped_correct = [list(x) for x in zipped if x[1]==x[2] and x[1] == 1]
    # print(zipped_correct[0:2])

    def get_predicted_prob(x):
        return (x[3])[(x[2])]

    sorted_correct = sorted(zipped_correct, key=get_predicted_prob, reverse=True)
    print(sorted_correct[0:2])

    #selection = sorted_correct[1]
    selection_zipped_tuple = list(zip(selection[0], selection[4], selection[5]))
    #selection_zipped_tuple = list(zip(selection[0], selection[4]))
    selection_zipped = [list(x) for x in selection_zipped_tuple]
    for s in selection_zipped:
        s[0] = dp.translate_to_voc(s[0])
    return selection_zipped
Beispiel #29
0
 def upd_inventory(inventory_id, inventory_date, update_controls=[None]):
     pp = DP.InventoryProcessor('Python210FinalDB.db')
     sql = pp.build_upd_code(inventory_id=inventory_id,
                             inventory_date=inventory_date)
     pp.execute_sql_code(sql)
     pp.db_con.commit()
     pp.db_con.close()
     if update_controls is not None:
         IOProcessor.sel_inventory(update_controls[0])
Beispiel #30
0
def SaveDataToFile():
    #Save data to file
    try:
        objF = DataProcessor.File()
        objF.FileName = "CustomerData.txt"
        objF.TextData = Customers.CustomerList.ToString()
        objF.SaveData()
    except Exception as e:
        print(e)
import tables as tab
import numpy as np
import DataProcessor as dp

datafile = tab.openFile('InstrumentedBicycleData.h5')
datatable = datafile.root.data.datatable

nanList = []

for x in datatable.iterrows():
    cell = x['AccelerationX']
    vnSampRate = x['NINumSamples']
    vnSig = dp.unsize_vector(cell, vnSampRate)
    numNan = np.sum(np.isnan(vnSig))
    if numNan > 2:
        nanList.append((x['RunID'], numNan))

nanList.sort(key=lambda x: x[1])
for thing in nanList:
    print thing
import numpy as np
import cv2
import pickle
import DataProcessor
import EmotionLearner
from sklearn import svm
import matplotlib.pyplot as plt

dataProcessor=DataProcessor()
data,label=dataProcessor.loadCKData_With_Hog_32x32_3x3()
data,featureMeans,featureVariance=dataProcessor.normalizeData(data)

emotionLearner=EmotionLearner()
scores=emotionLearner.crossValidateSVM(data,label);
print scores
print np.means(scores)
clf=emotionLearner.trainSVM(data,label);

#save data and svm
with open('classifier/trainingDataCK.pkl', 'wb') as output:
    pickle.dump(data, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(label, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(featureMeans, output, pickle.HIGHEST_PROTOCOL)
    pickle.dump(featureVariance, output, pickle.HIGHEST_PROTOCOL)

with open('classifier/svmCK.pkl', 'wb') as output:
    pickle.dump(clf, output, pickle.HIGHEST_PROTOCOL)
Beispiel #33
0
        # for index, data_frame in zip(range(len(data_frames)), data_frames):
        #     plot([d.daily_return(data_frame)], 'Daily Return', market_name[index].upper()+' Index')

        # print(data_frames[0].head(5))

        #d.plot_data([data_frames[0],data_frames[7]], ['Daily Return'], market_names=market_name)
        #plt.show()

        # Plot data
        # data_frames[0]['Daily Return'].plot()

        #print(data_frames[0].index.name)
        #print(data_frames[0].columns.values.tolist())

        dp = DataProcessor()

        data_points = [4, 8, 12]

        # Compute moving average
        for data_frame in data_frames:
            data_frame = dp.get_moving_average(data_frame, data_points)

        #Compute exponential moving average
        for data_frame in data_frames:
            data_frame = dp.get_ewma(data_frame, data_points)

        # cols=['Adj Close','MA_5','MA_10','MA_15','MA_20']
        # plot different calculations

        '''
Beispiel #34
0
import matplotlib.pyplot as plt
from scipy import stats
from scipy.interpolate import UnivariateSpline
import DataProcessor as dp

# pick a run number
runid = 124
print "RunID:", runid

# open the data file
datafile = tab.openFile('InstrumentedBicycleData.h5')

datatable = datafile.root.data.datatable

# get the raw data
niAcc = dp.get_cell(datatable, 'FrameAccelY', runid)
vnAcc = dp.get_cell(datatable, 'AccelerationZ', runid)
sampleRate = dp.get_cell(datatable, 'NISampleRate', runid)
numSamples = dp.get_cell(datatable, 'NINumSamples', runid)
speed = dp.get_cell(datatable, 'Speed', runid)
threeVolts = dp.get_cell(datatable, 'ThreeVolts', runid)

# close the file
datafile.close()

# make a nice time vector
time = dp.time_vector(numSamples, sampleRate)

# scale the NI signal from volts to m/s**2, and switch the sign
niSig = -(niAcc - threeVolts / 2.) / (300. / 1000.) * 9.81
vnSig = vnAcc
import tables as tab
import numpy as np
import matplotlib.pyplot as plt
import DataProcessor as dp

datafile = tab.openFile('InstrumentedBicycleData.h5')
datatable = datafile.root.data.datatable
for x in datatable.iterrows():
    if x['RunID'] == 4:
        pass
    else:
        if x['Maneuver'] != 'System Test':
            numSamp = x['NINumSamples']
            sampleRate = x['NISampleRate']
            time = np.linspace(0., numSamp/sampleRate, num=numSamp)
            acceleration = dp.unsize_vector(x['FrameAccelY'], numSamp)
            print '--------------------'
            print 'Run ID:', x['RunID']
            print 'Speed:', x['Speed']
            print 'Notes:', x['Notes']
            print 'Environment:', x['Environment']
            print 'Maneuver:', x['Maneuver']
            print 'Total time:', time[-1]
            print 'Time of max value:', time[np.argmax(acceleration)]
            print 'Max value:', np.max(acceleration)
            print '--------------------'
            if time[np.argmax(acceleration)] > 5.:
                plt.figure(x['RunID'])
                plt.plot(time, acceleration)
                plt.title(x['Speed'])
        week = datetime.timedelta(days = 7)
        days = Analysis.findHistoricalWeekdays(data, specifiedDate)
        # extends to 1 month behind and ahead of these days
        # grabs [-3, 0, 3], then adds [-4, -1, 2] and [-2, 1, 4]
        days = map(lambda x: x - 3 * week, days) + days + map(lambda x: x + 3 * week, days)
        days = map(lambda x: x - 1 * week, days) + days + map(lambda x: x + 1 * week, days)
        # return only the ones in the dataset
        return [day for day in days if (data.has_key(day))]



if __name__ == "__main__":
    formatter = "{0:.2f}"

    print("Processing file " + sys.argv[1] + " for " + sys.argv[2])
    dataMap = DataProcessor.processFile(sys.argv[1])
    # find the rational uper bound on graphs
    dataMax = 0
    for values in dataMap.values():
        p = np.percentile(values, 90)
        dataMax = max(dataMax, p)

    givenDate = sys.argv[2].split("-")
    givenDate = datetime.date(int(givenDate[0]), int(givenDate[1]), int(givenDate[2]))

    givenTime = None
    if (len(sys.argv) >= 4):
        givenTime = sys.argv[3].split(":")
        givenTime = datetime.time(int(givenTime[0]), int(givenTime[1]), int(givenTime[2]))

    # grabs the filepath
Beispiel #37
0
# Setting up all the options!
opts_prepro = {'rescale':rescale, 'scaleFactor':scaleFactor, 'colorSpace':colorSpace}
opts_seeds = {'method':seedSelection}
opts_labeller = {'labelsType':labelsType, 'labelThrs':labelThrs, 'colorSpace':colorSpace}

#############################################
# 2. Now it's time to play with the data
#############################################
gt_index = 0
TLabels = []
for im in Images:
	#############################################
	# 2.1. Data preprocessing & transformation
	#############################################
	imprep 	 = dp.processData(im, opts_prepro)
	#############################################
	# 2.2. Color space transformation (from Image to np.ndarray X)
	#############################################
	X = ce.transformColorSpace(imprep, opts_prepro)
	#############################################
	# 2.3. Selection of K (number of clusters)
	#############################################
	#Done already below for K == -1

	#############################################
	# 2.4. Selection of the K seeds --> Seeds
	#############################################
	
	if K == -1:
	  resultList = []
Beispiel #38
0
            if normalizedQUarterlyReviewCount[i-1][1] == 0:
                quarterDeltas.append([normalizedQUarterlyReviewCount[i][0], 0])
            else:
                quarterDeltas.append([normalizedQUarterlyReviewCount[i][0], (normalizedQUarterlyReviewCount[i][1]-normalizedQUarterlyReviewCount[i-1][1])/normalizedQUarterlyReviewCount[i-1][1]])
    
    return quarterDeltas
    
if __name__ == '__main__':
    ticker = "SBUX";
    bizFullName = "Starbucks"
    conn = mysql.connector.connect(user='******', password='******',
                              host='107.170.18.102',
                              database='FinanceNLP')
    cursor_select = conn.cursor()
    erDates = getERDates(ticker, cursor_select)#read ER dates from my DB    
    dailyReviewCounts = DataProcessor.getDailyReviewsCount(bizFullName)#list of tuple(date, review count)    
    quarterReviewCounts = getQuarterCumulated(erDates, dailyReviewCounts)#review count over quarters, tuple list (ER date, previous quarter review count)    
    quarterReviewCountsList = [[item[0], item[1]] for item in quarterReviewCounts.items()]
    quarterReviewCountsList.sort(key = operator.itemgetter(0), reverse=False)
    normalizedQuarterlyReviewCount = getNormalizedQuarterlyReviewCount(quarterReviewCountsList)#TODY: other normalized method? or no normalize
    quarterDeltas = getQuarterlyReviewCountDeltas(normalizedQuarterlyReviewCount)#difference between quarters
    # now quarterDeltasList becomes daily average review count in each quarter    
    ER2Surprise = getStockPriceSurprise(ticker, erDates)#hash (date, price change)
#     print ER2Surprise

    lineReviewCount = []
    linePrice = []
    lineER = []
    print "Date\t", "ReviewCount Change\t", "Price Change\n", 
    for er in quarterDeltas:
        dt = er[0]
Beispiel #39
0
import tensorflow as tf
import numpy as np
import DataProcessor as dp
from matplotlib import pyplot


nodeId = 205
source = "House"
startDate = "2016-01-12"
endDate = "2016-01-12"

feature_columns = [tf.feature_column.numeric_column("x", shape=[9])]

[dataSetTime,dataSetTemp,trainingDataX,trainingDataY,testDataX,testDataY]=dp.getParams(nodeId,source,startDate,endDate)

estimator = tf.estimator.DNNRegressor(
    feature_columns=feature_columns,
    hidden_units=[30, 30, 30],
    optimizer=tf.train.ProximalAdagradOptimizer(
      learning_rate=10,
      l1_regularization_strength=0.001
))
cX = 0
inp = trainingDataX
batchSize = 500
numBatches = len(trainingDataX)//batchSize

def input_fn():

    global cX
    if(cX > len(trainingDataX)):