Exemple #1
0
def Writing():

    readData()
    data = readData()

    def getDataFromUser():
        print("Add student")
        name = input("name : ")
        surname = input("surname : ")
        age = int(input("age : "))
        username = input("username : "******"password : "******"name": name,
            "surname": surname,
            "age": age,
            "username": username,
            "password": password
        }
        data['student'].append(students)

    num = int(input("Elave edilecek telebe sayini yazin : "))

    for i in range(num):
        getDataFromUser()

    with open("database.json", "w") as conn:
        json.dump(data, conn)
Exemple #2
0
def trainModel():
    train_data = f.readData("recommend_train_1hot_coding.csv")
    X = train_data[:, 0:-1]
    y = train_data[:, -1]
    clf = SVC(C=137, class_weight='balanced', probability=True)
    clf.fit(X, y)
    return clf
Exemple #3
0
def run_dp_parser(filenames, no_of_sentences, NO_OF_THREADS=5):
    print("Swapnil")
    # sentences=F.load_to_file(filenames.sents_file_name,filenames.output_folder)
    # triplets_dict={}
    # F.count=0
    # path_to_jar = '/home/cs17mtech11004/stanford-parser-full-2018-02-27/stanford-parser.jar'
    # path_to_models_jar = '/home/cs17mtech11004/stanford-parser-full-2018-02-27/stanford-parser-3.9.1-models.jar'
    # dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar)
    # dependency_parser=""
    # no_of_sent=len(sentences)
    # if no_of_sentences>0:
    # 	no_of_sent=no_of_sentences
    os.system("mkdir -p " + filenames.output_folder + "/dp_data_pos")
    data = F.readData(filenames.corpus_name)
    print("Read Complete", len(data))

    def func(i):
        start = int(i * (len(data) / NO_OF_THREADS))
        end = int(min(((i + 1) * (len(data) / NO_OF_THREADS)), len(data)) - 1)
        print("Thread", i, start, end)
        F.genrate_triplets_full(i, data[start:end], filenames)

    t = [
        threading.Thread(target=func, args=(i, )) for i in range(NO_OF_THREADS)
    ]
    for temp in t:
        temp.start()
    for temp in t:
        temp.join()
def deleting():
    readData()

    data = readData()

    oldpass = int(input("write password : "******"Melumat silindi...")

    with open("database.json", "w") as conn:
        json.dump(data, conn)
def updating():
    readData()

    data = readData()

    oldpass = int(input("old password daxil edin : "))
    newpass = int(input("new password : "******"Melumat deyisdirildi...")

    with open("database.json", "w") as conn:
        json.dump(data, conn)
Exemple #6
0
def main():
    model = trainModel()
    cur = os.getcwd()
    files = os.listdir(cur + '/recommend')
    for doc in files:
        test_data = f.readData("recommend/" + doc)
        test_data = test_data[:, 0:-1]
        test_score = model.predict_log_proba(test_data)
        np.savetxt(doc[:-4] + "_score.csv", test_score, delimiter=",")
def preprocessing(filenames):
    data = ""
    sentences = []
    words = []

    # Find Sentences and save to file
    data = F.readData(filenames.corpus_name)
    import os
    if(not os.path.isfile(filenames.output_folder+'/'+filenames.sents_file_name)):
        sentences = F.getSentences(data)
        F.save_to_file(filenames.sents_file_name, sentences, filenames.output_folder)
    else:
        print("Sentences File Found")
        sentences=F.load_to_file(filenames.sents_file_name,filenames.output_folder)
    
    if(not os.path.isfile(filenames.output_folder+'/'+filenames.words_file_name))    :
        words = F.getWords(sentences)
        F.save_to_file(filenames.words_file_name, words, filenames.output_folder)
    else:
        print("Words File Found")
        words = F.load_to_file(filenames.words_file_name,filenames.output_folder)
    
    # Find Sentences and save to file
    
    print("Length of text data: ",len(data))

    # updated_words, vocab = F.getVocabulary(words, 400,filenames)
    # updated_words, vocab = F.getVocabulary(words, 300,filenames)
    # updated_words, vocab = F.getVocabulary(words, 200,filenames)
    # updated_words, vocab = F.getVocabulary(words, 100,filenames)
    # updated_words, vocab = F.getVocabulary(words, 75,filenames)
    # updated_words, vocab = F.getVocabulary(words, 50,filenames)
    # updated_words, vocab = F.getVocabulary(words, 25,filenames)
    # updated_words, vocab = F.getVocabulary(words, 20,filenames)
    # updated_words, vocab = F.getVocabulary(words, 15,filenames)
    updated_words, vocab = F.getVocabulary(words, 10,filenames)
    # updated_words, vocab = F.getVocabulary(words, 5,filenames)
    # updated_words, vocab = F.getVocabulary(words, 4,filenames)
    # updated_words, vocab = F.getVocabulary(words, 3,filenames)
    # updated_words, vocab = F.getVocabulary(words, 2,filenames)
    # updated_words, vocab = F.getVocabulary(words, 1,filenames)
    # updated_words, vocab = F.getVocabulary(words, 0,filenames)

    F.save_to_file(filenames.vocab_file, vocab, filenames.output_folder)
    F.save_to_file(filenames.updated_words_file_name, updated_words, filenames.output_folder)

    word_to_index = {}
    index_to_word = {}
    for k, v in enumerate(vocab):
        word_to_index[v] = k
        index_to_word[k] = v

    F.save_to_file(filenames.w2i_file, word_to_index, filenames.output_folder)
    F.save_to_file(filenames.i2w_file, index_to_word, filenames.output_folder)
    print(len(sentences), len(words))
Exemple #8
0
def reading():

    readData()
    data = readData()

    print("""Student information -> 1
Teachers information 2""")
    info = int(input("press 1 or 2 : "))

    if info == 1:
        for item in data['student']:
            print(
                f"Name : {item['name']} \nSurname :  {item['surname']} \n"
                f"Age :  {item['age']} \nUsername : {item['username']} \nPassword : {item['password']} \n"
            )
    elif info == 2:
        for item in data['teachers']:
            print(
                f"Name : {item['name']} \nSurname :  {item['surname']} \n"
                f"Username : {item['username']} \nPassword : {item['password']} \n"
            )
Exemple #9
0
def main():
    np.random.seed(0)

    data = f.readData()
    train, validation, test = f.splitData(data.shape[0])

    C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]

    # C = 0.061 F-1 = 0.525
    trainRegularizationStrengthForl2(data, train, validation, C)

    # C = 0.175 F-1 = 0.526170798898
    trainRegularizationStrengthForl1(data, train, validation, C)
def run_dp_parser(filenames, no_of_sentences, NO_OF_THREADS=5):

    os.system("mkdir -p " + filenames.output_folder + "/dp_data_pos")
    data = F.readData(filenames.corpus_name)
    print("Read Complete", len(data))

    def func(i):
        start = int(i * (len(data) / NO_OF_THREADS))
        end = int(min(((i + 1) * (len(data) / NO_OF_THREADS)), len(data)) - 1)
        print("Thread", i, start, end)
        F.genrate_triplets_full(i, data[start:end], filenames)

    t = [threading.Thread(target=func, args=(i,)) for i in range(NO_OF_THREADS)]
    t = [threading.Thread(target=func, args=(i,)) for i in range(NO_OF_THREADS)]
    for temp in t:
        temp.start()
    for temp in t:
        temp.join()
def main():
    np.random.seed(0)

    data = f.readData()
    train, validation, test = f.splitData(data.shape[0])

    # trainNeuralNetworks(data, train, validation)

    alphas = np.arange(
        0.0001, 0.0015,
        0.0001)  #[0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01]
    N = [75, 100]
    F_1 = np.zeros([len(alphas), len(N)])
    for i in range(len(alphas)):
        for j in range(len(N)):
            F_1[i, j] = trainNeuralNetworks(data,
                                            train,
                                            validation,
                                            N=N[j],
                                            alpha=alphas[i])
def main():
    np.random.seed(0)

    data = f.readData()
    train, validation, test = f.splitData(data.shape[0])

    # C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]

    # n_estimators = [5000, 10000, 50000, 100000, 500000]
    # n_estimators = np.arange(10, 200, 10)
    # n_estimators = np.repeat([100],100)

    # number_of_trees = 100, average F-1 on 100 forests = 0.377228139802
    # trainRandomForest(data, train, validation, n_estimators, max_features = None)

    n_estimators = [100, 200, 500, 1000]
    # number_of_boosting_stages = 100, average F-1 on 100 boostings = 0.377228139802
    trainGradientBoosting(data,
                          train,
                          validation,
                          n_estimators,
                          max_features='auto')
Exemple #13
0
def preprocessing(filenames):
    data = ""
    sentences = []
    words = []
    # if 's' not in F.sys.argv:
    # 	print("A")
    # Find Sentences and save to file
    data = F.readData(filenames.corpus_name)
    sentences = F.getSentences(data)
    F.save_to_file(filenames.sents_file_name, sentences,
                   filenames.output_folder)
    # else:
    # 	print("B")
    # 	sentences=F.load_to_file(filenames.sents_file_name)

    # if 'w' not in F.sys.argv:
    print("C")
    # Find Sentences and save to file
    words = F.getWords(sentences)
    F.save_to_file(filenames.words_file_name, words, filenames.output_folder)
    # else:
    # 	print("D")
    # 	words=F.load_to_file(filenames.words_file_name)

    updated_words, vocab = F.getVocabulary(words, 400)
    F.save_to_file(filenames.vocab_file, vocab, filenames.output_folder)
    F.save_to_file(filenames.updated_words_file_name, updated_words,
                   filenames.output_folder)

    word_to_index = {}
    index_to_word = {}
    for k, v in enumerate(vocab):
        word_to_index[v] = k
        index_to_word[k] = v
    F.save_to_file(filenames.w2i_file, word_to_index, filenames.output_folder)
    F.save_to_file(filenames.i2w_file, index_to_word, filenames.output_folder)

    print(len(sentences), len(words))
Exemple #14
0
def main():
    np.random.seed(0)

    data = f.readData()
    train, validation, test = f.splitData(data.shape[0])

    # C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]
    C = [0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005]
    # C = [10, 50, 100, 500, 1000]
    # C = np.arange(1, 10, 1)

    # C = 137, F-1 = 0.541310541311
    trainSVMWithGaussianKernel(data, train, validation, C)

    trainSVMWithLinearKernel(data, train, validation, C)

    # SVM with Linear Kernel

    # l1, squared hinge, C = 50, F-1 = 0.525447042641
    # l2, hinge, C = 0.001 , F-1 = 0.512968299712
    # l2, squared hinge, C = 1, F-1 = 0.524725274725

    trainSVMWithLinearKernel2(data, train, validation, C)
Exemple #15
0
                        # for i in range(nodeNum + 1):
                        #    if (i != j):
                        #        lhs.addTerms(1, mu[i, j])
                        #        lhs.addTerms(-1, mu[j, i])
                        # model.cbLazy(lhs == 1)
                        model.cbLazy(
                            quicksum(mu[i, j] - mu[j, i]
                                     for i in range(nodeNum + 1)
                                     if i != j) == 1)


starttime = time.time()

nodeNum = 76
path = 'C:/academic/Graduate Paper/code/pr76.txt'
cost = readData(path, nodeNum)
printData(cost)

method = 'S'
num = 5
measure = 'number'

model = Model('TSP')

# creat decision variables
X = {}
mu = {}
for i in range(nodeNum + 1):
    for j in range(nodeNum + 1):
        if (i != j):
            X[i, j] = model.addVar(vtype=GRB.BINARY,
          
def genLearning(data, cls):
    
    classes = list(set(cls))
    n = len(data) #length of data 
    predicted = []
    p = np.random.permutation(len(data))
    data = data[p]
    cls = cls[p]
    
    kf = KFold(n, n_folds = 10, shuffle = False) #Cross Validation
    for trainIndex, testIndex in kf:
        Mu, S = train(data[trainIndex],cls[trainIndex]) #train the data, calculate Mu and Sigma for train data
        predicted.append(classify(data[testIndex], cls[testIndex], classes, Mu, S)) #Classify
    predicted = list(itertools.chain(*predicted))
    f.evaluate(predicted,cls, classes)
    return (predicted,cls)

if __name__ == "__main__":     
    reload(f)  
    fileName = "iris.csv"
    data, cls = f.readData(fileName)
    
    data = data[:100] #uses first 2 classes, 50 of each
    cls = cls[:100]   #uses first 2 classes, 50 of each
    predicted, cls = genLearning(data, cls) #Train, predict & evaluate
    f.plot_matrix(cls, predicted)
    
    
    
    
Exemple #17
0
import cc3d
import numpy as np
import time
import h5py
from numba import njit, types
from numba.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning
import warnings
from numba.typed import Dict
import os
import pickle
import param
import sys

from functions import readData, writeData, compareOutp, concatBlocks

# (STEP 4 visualize wholes
# print out total of found wholes
blocks_concat = concatBlocks(z_start=param.z_start, y_start=param.y_start, x_start=param.x_start, n_blocks_z=param.n_blocks_z, n_blocks_y=param.n_blocks_y, n_blocks_x=param.n_blocks_x,
                                bs_z=param.max_bs_z, bs_y=param.max_bs_y, bs_x=param.max_bs_x, output_path=param.folder_path)

filename = param.data_path+"/"+param.sample_name+"/"+param.sample_name
box = [1]
labels_inp = readData(box, filename)
neg = np.subtract(blocks_concat, labels_inp)
output_name = "wholes"
writeData(param.folder_path+output_name, neg)

compareOutp(output_path=param.data_path,sample_name=param.sample_name,ID_B=param.outp_ID )
sents_file_name = 'sents'
words_file_name = 'words'
updated_words_file_name = 'updated_words'
vocab_file = 'vocab'
w2i_file = 'word_to_index'
i2w_file = 'index_to_word'

corpus_name = '../Data/reviews.txt'

data = ""
sentences = []
words = []
if 's' not in F.sys.argv:
    print("A")
    data = F.readData(corpus_name)
    sentences = F.getSentences(data)
    F.save_to_file(sents_file_name, sentences)
else:
    print("B")
    sentences = F.load_to_file(sents_file_name)

if 'w' not in F.sys.argv:
    print("C")
    words = F.getWords(sentences)
    F.save_to_file(words_file_name, words)
else:
    print("D")
    words = F.load_to_file(words_file_name)

updated_words, vocab = F.getVocabulary(words, 400)
# # for i in r:
# #     print(i)
#
# # counter = 0
# # for i in range(20):
# #  counter = counter + 1
# #  if counter%5 == 0:
# #      print("Count is: " + str(counter))

# labels = readData([1],"/home/frtim/wiring/raw_data/segmentations/Zebrafinch/0000")
# print(labels.shape)
#
# labels = readData([1],"/home/frtim/wiring/raw_data/segmentations/Zebrafinch/0128")
# print(labels.shape)
#
labels = readData([1],
                  "/home/frtim/wiring/raw_data/segmentations/Zebrafinch/5632")
print(labels.shape)

#
# f = open("/home/frtim/Desktop/test.txt", "a+")
# f.write("1\n")
# f.close()
#
# f = open("/home/frtim/Desktop/test.txt", "a+")
# f.write("4\n")
# f.close()
#
# f = open("/home/frtim/Desktop/test.txt", "a+")
# f.write("3\n")
# f.close()
#

def normalize(rows):
    desired_max, desired_min = 1, -1
    X = np.array(rows)
    numerator = (X - X.min(axis=0))
    numerator *= (desired_max - desired_min)
    denom = X.max(axis=0) - X.min(axis=0)
    denom[denom == 0] = 1
    return (desired_min + numerator / denom).tolist()


if __name__ == '__main__':
    sc = SparkContext()

    data = functions.readData(sc, "/cs455/project/data/chars")
    data = functions.filterByAddons(data)
    data = functions.filterByClass(data).persist()

    archers = data.filter(lambda char: char['character']['class'] == 'Archer'
                          ).map(archerToColumn)
    archerList = archers.collect()

    levelIndex = 2
    levelSets = [[i] for i in range(1, 16)
                 ] + [[i for i in range(5 * j + 16, 5 * j + 21)]
                      for j in range(7)]

    normedRows = []
for levelSet in levelSets:
    levelRows = [
	print("Data splitting finished.")
	return train, test


def resampleTrain(train):
	np.random.seed()
	n = len(train)
	train = np.random.choice(train, math.floor(n*8/9) ,replace=True)
	return train



n = 1000
f1 = np.zeros(n)
clf = SVC(C = 137, class_weight = 'balanced')
data = f.readData()
train, test = splitData(data.shape[0])
for i in range(n):
	print(i+1)
	train_ = resampleTrain(train)

	X_train, y_train = v.makeMatrix(data, train_)
	X_test, y_test = v.makeMatrix(data, test)
	try:
		f1[i] = v.validate(data, X_train, y_train, X_test, y_test, clf)
	except Exception:
		f1[i] = 0



np.savetxt("f1_confidence.csv", f1, delimiter=",")
def main():
    np.random.seed(0)

    data = f.readData()
    train, validation, test = f.splitData(data.shape[0])

    X_train, y_train = v.makeMatrix(data, train)
    X_test, y_test = v.makeMatrix(data, test)

    print("Logistic Regression")
    clf = LogisticRegression(C=0.061,
                             class_weight='balanced',
                             max_iter=10000,
                             solver='sag',
                             n_jobs=-1)
    f1 = v.validate(data, X_train, y_train, X_test, y_test, clf)
    print("F-1 measure for Logistic Regression with l2 and C = %s is %s" %
          (0.061, f1))

    clf = LogisticRegression(penalty='l1',
                             C=0.175,
                             class_weight='balanced',
                             max_iter=5000,
                             n_jobs=-1)
    f1 = v.validate(data, X_train, y_train, X_test, y_test, clf)
    print("F-1 measure for Logistic Regression with l1 and C = %s is %s" %
          (0.175, f1))

    print("SVM")
    clf = SVC(C=137, class_weight='balanced')
    f1 = v.validate(data, X_train, y_train, X_test, y_test, clf)
    print("F-1 measure for SVM with RBF and C = %s is %s" % (137, f1))

    # l1, squared hinge, C = 50, F-1 = 0.525447042641
    # l2, hinge, C = 0.001 , F-1 = 0.512968299712
    # l2, squared hinge, C = 1, F-1 = 0.524725274725
    C = 50
    loss = "squared_hinge"
    penalty = 'l1'
    clf = LinearSVC(C=C,
                    loss=loss,
                    penalty=penalty,
                    class_weight='balanced',
                    dual=False)
    f1 = v.validate(data, X_train, y_train, X_test, y_test, clf)
    print(
        "F-1 measure for SVM with Linear Kernal, %s loss, %s penalty, and C = %s is %s"
        % (loss, penalty, C, f1))

    C = 0.001
    loss = "hinge"
    penalty = 'l2'
    clf = LinearSVC(C=C,
                    loss=loss,
                    penalty=penalty,
                    class_weight='balanced',
                    dual=True)
    f1 = v.validate(data, X_train, y_train, X_test, y_test, clf)
    print(
        "F-1 measure for SVM with Linear Kernal, %s loss, %s penalty, and C = %s is %s"
        % (loss, penalty, C, f1))

    C = 1
    loss = "squared_hinge"
    penalty = 'l2'
    clf = LinearSVC(C=C,
                    loss=loss,
                    penalty=penalty,
                    class_weight='balanced',
                    dual=False)
    f1 = v.validate(data, X_train, y_train, X_test, y_test, clf)
    print(
        "F-1 measure for SVM with Linear Kernal, %s loss, %s penalty, and C = %s is %s"
        % (loss, penalty, C, f1))
Exemple #23
0
from pyspark import SparkContext
import functions as fs

if __name__ == '__main__':
    sc = SparkContext()
    chars = fs.filterByAddons(fs.readData(sc, "/cs455/project/data/chars"))
Exemple #24
0
bs_x = 2048

#make Zebrafinch blocks into chunks of size 512x2048x2048 (smaller if on edge)
from functions import readData, makeFolder, blockFolderPath, writeData
zrange = np.arange(0, 45)

sample_name = ""
folder_path = "/n/pfister_lab2/Lab/tfranzmeyer/Zebrafinch/"

for bz in zrange[::4]:

    z_block = int(bz / 4)
    print("bz is: " + str(bz), flush=True)

    filename = folder_path + "/" + sample_name + "/" + str(bz * 128).zfill(4)
    block_a = readData(box=[1], filename=filename)

    if bz != zrange[-1]:
        filename = folder_path + "/" + sample_name + "/" + str(
            (bz + 1) * 128).zfill(4)
        block_b = readData(box=[1], filename=filename)
        block_a = np.concatenate((block_a, block_b), axis=0)
        del block_b

        filename = folder_path + "/" + sample_name + "/" + str(
            (bz + 2) * 128).zfill(4)
        block_c = readData(box=[1], filename=filename)
        block_a = np.concatenate((block_a, block_c), axis=0)
        del block_c

        filename = folder_path + "/" + sample_name + "/" + str(
zrange = np.arange(param.z_start, param.z_start + param.n_blocks_z)

sample_name = "stacked_512"
folder_path = "/n/pfister_lab2/Lab/tfranzmeyer/Zebrafinch/"
outp_ID = "allBlocksProcessed"

for z_block in range(12):
    print("Z is " + str(z_block), flush=True)

    for y_block in range(3):
        print("Y is " + str(y_block), flush=True)

        filename = folder_path + "/" + sample_name + "/" + outp_ID + "/z" + str(
            z_block).zfill(4) + "y" + str(y_block).zfill(4) + "x" + str(
                0).zfill(4) + "/" + str(z_block).zfill(4)
        block_0 = readData(box=[1], filename=filename)
        print(block_0.shape)

        filename = folder_path + "/" + sample_name + "/" + outp_ID + "/z" + str(
            z_block).zfill(4) + "y" + str(y_block).zfill(4) + "x" + str(
                1).zfill(4) + "/" + str(z_block).zfill(4)
        block_1 = readData(box=[1], filename=filename)
        print(block_1.shape)

        filename = folder_path + "/" + sample_name + "/" + outp_ID + "/z" + str(
            z_block).zfill(4) + "y" + str(y_block).zfill(4) + "x" + str(
                2).zfill(4) + "/" + str(z_block).zfill(4)
        block_2 = readData(box=[1], filename=filename)
        print(block_2.shape)

        x_data = np.concatenate((block_0, block_1, block_2), axis=2)