def Writing(): readData() data = readData() def getDataFromUser(): print("Add student") name = input("name : ") surname = input("surname : ") age = int(input("age : ")) username = input("username : "******"password : "******"name": name, "surname": surname, "age": age, "username": username, "password": password } data['student'].append(students) num = int(input("Elave edilecek telebe sayini yazin : ")) for i in range(num): getDataFromUser() with open("database.json", "w") as conn: json.dump(data, conn)
def trainModel(): train_data = f.readData("recommend_train_1hot_coding.csv") X = train_data[:, 0:-1] y = train_data[:, -1] clf = SVC(C=137, class_weight='balanced', probability=True) clf.fit(X, y) return clf
def run_dp_parser(filenames, no_of_sentences, NO_OF_THREADS=5): print("Swapnil") # sentences=F.load_to_file(filenames.sents_file_name,filenames.output_folder) # triplets_dict={} # F.count=0 # path_to_jar = '/home/cs17mtech11004/stanford-parser-full-2018-02-27/stanford-parser.jar' # path_to_models_jar = '/home/cs17mtech11004/stanford-parser-full-2018-02-27/stanford-parser-3.9.1-models.jar' # dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) # dependency_parser="" # no_of_sent=len(sentences) # if no_of_sentences>0: # no_of_sent=no_of_sentences os.system("mkdir -p " + filenames.output_folder + "/dp_data_pos") data = F.readData(filenames.corpus_name) print("Read Complete", len(data)) def func(i): start = int(i * (len(data) / NO_OF_THREADS)) end = int(min(((i + 1) * (len(data) / NO_OF_THREADS)), len(data)) - 1) print("Thread", i, start, end) F.genrate_triplets_full(i, data[start:end], filenames) t = [ threading.Thread(target=func, args=(i, )) for i in range(NO_OF_THREADS) ] for temp in t: temp.start() for temp in t: temp.join()
def deleting(): readData() data = readData() oldpass = int(input("write password : "******"Melumat silindi...") with open("database.json", "w") as conn: json.dump(data, conn)
def updating(): readData() data = readData() oldpass = int(input("old password daxil edin : ")) newpass = int(input("new password : "******"Melumat deyisdirildi...") with open("database.json", "w") as conn: json.dump(data, conn)
def main(): model = trainModel() cur = os.getcwd() files = os.listdir(cur + '/recommend') for doc in files: test_data = f.readData("recommend/" + doc) test_data = test_data[:, 0:-1] test_score = model.predict_log_proba(test_data) np.savetxt(doc[:-4] + "_score.csv", test_score, delimiter=",")
def preprocessing(filenames): data = "" sentences = [] words = [] # Find Sentences and save to file data = F.readData(filenames.corpus_name) import os if(not os.path.isfile(filenames.output_folder+'/'+filenames.sents_file_name)): sentences = F.getSentences(data) F.save_to_file(filenames.sents_file_name, sentences, filenames.output_folder) else: print("Sentences File Found") sentences=F.load_to_file(filenames.sents_file_name,filenames.output_folder) if(not os.path.isfile(filenames.output_folder+'/'+filenames.words_file_name)) : words = F.getWords(sentences) F.save_to_file(filenames.words_file_name, words, filenames.output_folder) else: print("Words File Found") words = F.load_to_file(filenames.words_file_name,filenames.output_folder) # Find Sentences and save to file print("Length of text data: ",len(data)) # updated_words, vocab = F.getVocabulary(words, 400,filenames) # updated_words, vocab = F.getVocabulary(words, 300,filenames) # updated_words, vocab = F.getVocabulary(words, 200,filenames) # updated_words, vocab = F.getVocabulary(words, 100,filenames) # updated_words, vocab = F.getVocabulary(words, 75,filenames) # updated_words, vocab = F.getVocabulary(words, 50,filenames) # updated_words, vocab = F.getVocabulary(words, 25,filenames) # updated_words, vocab = F.getVocabulary(words, 20,filenames) # updated_words, vocab = F.getVocabulary(words, 15,filenames) updated_words, vocab = F.getVocabulary(words, 10,filenames) # updated_words, vocab = F.getVocabulary(words, 5,filenames) # updated_words, vocab = F.getVocabulary(words, 4,filenames) # updated_words, vocab = F.getVocabulary(words, 3,filenames) # updated_words, vocab = F.getVocabulary(words, 2,filenames) # updated_words, vocab = F.getVocabulary(words, 1,filenames) # updated_words, vocab = F.getVocabulary(words, 0,filenames) F.save_to_file(filenames.vocab_file, vocab, filenames.output_folder) F.save_to_file(filenames.updated_words_file_name, updated_words, filenames.output_folder) word_to_index = {} index_to_word = {} for k, v in enumerate(vocab): word_to_index[v] = k index_to_word[k] = v F.save_to_file(filenames.w2i_file, word_to_index, filenames.output_folder) F.save_to_file(filenames.i2w_file, index_to_word, filenames.output_folder) print(len(sentences), len(words))
def reading(): readData() data = readData() print("""Student information -> 1 Teachers information 2""") info = int(input("press 1 or 2 : ")) if info == 1: for item in data['student']: print( f"Name : {item['name']} \nSurname : {item['surname']} \n" f"Age : {item['age']} \nUsername : {item['username']} \nPassword : {item['password']} \n" ) elif info == 2: for item in data['teachers']: print( f"Name : {item['name']} \nSurname : {item['surname']} \n" f"Username : {item['username']} \nPassword : {item['password']} \n" )
def main(): np.random.seed(0) data = f.readData() train, validation, test = f.splitData(data.shape[0]) C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] # C = 0.061 F-1 = 0.525 trainRegularizationStrengthForl2(data, train, validation, C) # C = 0.175 F-1 = 0.526170798898 trainRegularizationStrengthForl1(data, train, validation, C)
def run_dp_parser(filenames, no_of_sentences, NO_OF_THREADS=5): os.system("mkdir -p " + filenames.output_folder + "/dp_data_pos") data = F.readData(filenames.corpus_name) print("Read Complete", len(data)) def func(i): start = int(i * (len(data) / NO_OF_THREADS)) end = int(min(((i + 1) * (len(data) / NO_OF_THREADS)), len(data)) - 1) print("Thread", i, start, end) F.genrate_triplets_full(i, data[start:end], filenames) t = [threading.Thread(target=func, args=(i,)) for i in range(NO_OF_THREADS)] t = [threading.Thread(target=func, args=(i,)) for i in range(NO_OF_THREADS)] for temp in t: temp.start() for temp in t: temp.join()
def main(): np.random.seed(0) data = f.readData() train, validation, test = f.splitData(data.shape[0]) # trainNeuralNetworks(data, train, validation) alphas = np.arange( 0.0001, 0.0015, 0.0001) #[0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01] N = [75, 100] F_1 = np.zeros([len(alphas), len(N)]) for i in range(len(alphas)): for j in range(len(N)): F_1[i, j] = trainNeuralNetworks(data, train, validation, N=N[j], alpha=alphas[i])
def main(): np.random.seed(0) data = f.readData() train, validation, test = f.splitData(data.shape[0]) # C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] # n_estimators = [5000, 10000, 50000, 100000, 500000] # n_estimators = np.arange(10, 200, 10) # n_estimators = np.repeat([100],100) # number_of_trees = 100, average F-1 on 100 forests = 0.377228139802 # trainRandomForest(data, train, validation, n_estimators, max_features = None) n_estimators = [100, 200, 500, 1000] # number_of_boosting_stages = 100, average F-1 on 100 boostings = 0.377228139802 trainGradientBoosting(data, train, validation, n_estimators, max_features='auto')
def preprocessing(filenames): data = "" sentences = [] words = [] # if 's' not in F.sys.argv: # print("A") # Find Sentences and save to file data = F.readData(filenames.corpus_name) sentences = F.getSentences(data) F.save_to_file(filenames.sents_file_name, sentences, filenames.output_folder) # else: # print("B") # sentences=F.load_to_file(filenames.sents_file_name) # if 'w' not in F.sys.argv: print("C") # Find Sentences and save to file words = F.getWords(sentences) F.save_to_file(filenames.words_file_name, words, filenames.output_folder) # else: # print("D") # words=F.load_to_file(filenames.words_file_name) updated_words, vocab = F.getVocabulary(words, 400) F.save_to_file(filenames.vocab_file, vocab, filenames.output_folder) F.save_to_file(filenames.updated_words_file_name, updated_words, filenames.output_folder) word_to_index = {} index_to_word = {} for k, v in enumerate(vocab): word_to_index[v] = k index_to_word[k] = v F.save_to_file(filenames.w2i_file, word_to_index, filenames.output_folder) F.save_to_file(filenames.i2w_file, index_to_word, filenames.output_folder) print(len(sentences), len(words))
def main(): np.random.seed(0) data = f.readData() train, validation, test = f.splitData(data.shape[0]) # C = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1] C = [0.000001, 0.000005, 0.00001, 0.00005, 0.0001, 0.0005] # C = [10, 50, 100, 500, 1000] # C = np.arange(1, 10, 1) # C = 137, F-1 = 0.541310541311 trainSVMWithGaussianKernel(data, train, validation, C) trainSVMWithLinearKernel(data, train, validation, C) # SVM with Linear Kernel # l1, squared hinge, C = 50, F-1 = 0.525447042641 # l2, hinge, C = 0.001 , F-1 = 0.512968299712 # l2, squared hinge, C = 1, F-1 = 0.524725274725 trainSVMWithLinearKernel2(data, train, validation, C)
# for i in range(nodeNum + 1): # if (i != j): # lhs.addTerms(1, mu[i, j]) # lhs.addTerms(-1, mu[j, i]) # model.cbLazy(lhs == 1) model.cbLazy( quicksum(mu[i, j] - mu[j, i] for i in range(nodeNum + 1) if i != j) == 1) starttime = time.time() nodeNum = 76 path = 'C:/academic/Graduate Paper/code/pr76.txt' cost = readData(path, nodeNum) printData(cost) method = 'S' num = 5 measure = 'number' model = Model('TSP') # creat decision variables X = {} mu = {} for i in range(nodeNum + 1): for j in range(nodeNum + 1): if (i != j): X[i, j] = model.addVar(vtype=GRB.BINARY,
def genLearning(data, cls): classes = list(set(cls)) n = len(data) #length of data predicted = [] p = np.random.permutation(len(data)) data = data[p] cls = cls[p] kf = KFold(n, n_folds = 10, shuffle = False) #Cross Validation for trainIndex, testIndex in kf: Mu, S = train(data[trainIndex],cls[trainIndex]) #train the data, calculate Mu and Sigma for train data predicted.append(classify(data[testIndex], cls[testIndex], classes, Mu, S)) #Classify predicted = list(itertools.chain(*predicted)) f.evaluate(predicted,cls, classes) return (predicted,cls) if __name__ == "__main__": reload(f) fileName = "iris.csv" data, cls = f.readData(fileName) data = data[:100] #uses first 2 classes, 50 of each cls = cls[:100] #uses first 2 classes, 50 of each predicted, cls = genLearning(data, cls) #Train, predict & evaluate f.plot_matrix(cls, predicted)
import cc3d import numpy as np import time import h5py from numba import njit, types from numba.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning import warnings from numba.typed import Dict import os import pickle import param import sys from functions import readData, writeData, compareOutp, concatBlocks # (STEP 4 visualize wholes # print out total of found wholes blocks_concat = concatBlocks(z_start=param.z_start, y_start=param.y_start, x_start=param.x_start, n_blocks_z=param.n_blocks_z, n_blocks_y=param.n_blocks_y, n_blocks_x=param.n_blocks_x, bs_z=param.max_bs_z, bs_y=param.max_bs_y, bs_x=param.max_bs_x, output_path=param.folder_path) filename = param.data_path+"/"+param.sample_name+"/"+param.sample_name box = [1] labels_inp = readData(box, filename) neg = np.subtract(blocks_concat, labels_inp) output_name = "wholes" writeData(param.folder_path+output_name, neg) compareOutp(output_path=param.data_path,sample_name=param.sample_name,ID_B=param.outp_ID )
sents_file_name = 'sents' words_file_name = 'words' updated_words_file_name = 'updated_words' vocab_file = 'vocab' w2i_file = 'word_to_index' i2w_file = 'index_to_word' corpus_name = '../Data/reviews.txt' data = "" sentences = [] words = [] if 's' not in F.sys.argv: print("A") data = F.readData(corpus_name) sentences = F.getSentences(data) F.save_to_file(sents_file_name, sentences) else: print("B") sentences = F.load_to_file(sents_file_name) if 'w' not in F.sys.argv: print("C") words = F.getWords(sentences) F.save_to_file(words_file_name, words) else: print("D") words = F.load_to_file(words_file_name) updated_words, vocab = F.getVocabulary(words, 400)
# # for i in r: # # print(i) # # # counter = 0 # # for i in range(20): # # counter = counter + 1 # # if counter%5 == 0: # # print("Count is: " + str(counter)) # labels = readData([1],"/home/frtim/wiring/raw_data/segmentations/Zebrafinch/0000") # print(labels.shape) # # labels = readData([1],"/home/frtim/wiring/raw_data/segmentations/Zebrafinch/0128") # print(labels.shape) # labels = readData([1], "/home/frtim/wiring/raw_data/segmentations/Zebrafinch/5632") print(labels.shape) # # f = open("/home/frtim/Desktop/test.txt", "a+") # f.write("1\n") # f.close() # # f = open("/home/frtim/Desktop/test.txt", "a+") # f.write("4\n") # f.close() # # f = open("/home/frtim/Desktop/test.txt", "a+") # f.write("3\n") # f.close() #
def normalize(rows): desired_max, desired_min = 1, -1 X = np.array(rows) numerator = (X - X.min(axis=0)) numerator *= (desired_max - desired_min) denom = X.max(axis=0) - X.min(axis=0) denom[denom == 0] = 1 return (desired_min + numerator / denom).tolist() if __name__ == '__main__': sc = SparkContext() data = functions.readData(sc, "/cs455/project/data/chars") data = functions.filterByAddons(data) data = functions.filterByClass(data).persist() archers = data.filter(lambda char: char['character']['class'] == 'Archer' ).map(archerToColumn) archerList = archers.collect() levelIndex = 2 levelSets = [[i] for i in range(1, 16) ] + [[i for i in range(5 * j + 16, 5 * j + 21)] for j in range(7)] normedRows = [] for levelSet in levelSets: levelRows = [
print("Data splitting finished.") return train, test def resampleTrain(train): np.random.seed() n = len(train) train = np.random.choice(train, math.floor(n*8/9) ,replace=True) return train n = 1000 f1 = np.zeros(n) clf = SVC(C = 137, class_weight = 'balanced') data = f.readData() train, test = splitData(data.shape[0]) for i in range(n): print(i+1) train_ = resampleTrain(train) X_train, y_train = v.makeMatrix(data, train_) X_test, y_test = v.makeMatrix(data, test) try: f1[i] = v.validate(data, X_train, y_train, X_test, y_test, clf) except Exception: f1[i] = 0 np.savetxt("f1_confidence.csv", f1, delimiter=",")
def main(): np.random.seed(0) data = f.readData() train, validation, test = f.splitData(data.shape[0]) X_train, y_train = v.makeMatrix(data, train) X_test, y_test = v.makeMatrix(data, test) print("Logistic Regression") clf = LogisticRegression(C=0.061, class_weight='balanced', max_iter=10000, solver='sag', n_jobs=-1) f1 = v.validate(data, X_train, y_train, X_test, y_test, clf) print("F-1 measure for Logistic Regression with l2 and C = %s is %s" % (0.061, f1)) clf = LogisticRegression(penalty='l1', C=0.175, class_weight='balanced', max_iter=5000, n_jobs=-1) f1 = v.validate(data, X_train, y_train, X_test, y_test, clf) print("F-1 measure for Logistic Regression with l1 and C = %s is %s" % (0.175, f1)) print("SVM") clf = SVC(C=137, class_weight='balanced') f1 = v.validate(data, X_train, y_train, X_test, y_test, clf) print("F-1 measure for SVM with RBF and C = %s is %s" % (137, f1)) # l1, squared hinge, C = 50, F-1 = 0.525447042641 # l2, hinge, C = 0.001 , F-1 = 0.512968299712 # l2, squared hinge, C = 1, F-1 = 0.524725274725 C = 50 loss = "squared_hinge" penalty = 'l1' clf = LinearSVC(C=C, loss=loss, penalty=penalty, class_weight='balanced', dual=False) f1 = v.validate(data, X_train, y_train, X_test, y_test, clf) print( "F-1 measure for SVM with Linear Kernal, %s loss, %s penalty, and C = %s is %s" % (loss, penalty, C, f1)) C = 0.001 loss = "hinge" penalty = 'l2' clf = LinearSVC(C=C, loss=loss, penalty=penalty, class_weight='balanced', dual=True) f1 = v.validate(data, X_train, y_train, X_test, y_test, clf) print( "F-1 measure for SVM with Linear Kernal, %s loss, %s penalty, and C = %s is %s" % (loss, penalty, C, f1)) C = 1 loss = "squared_hinge" penalty = 'l2' clf = LinearSVC(C=C, loss=loss, penalty=penalty, class_weight='balanced', dual=False) f1 = v.validate(data, X_train, y_train, X_test, y_test, clf) print( "F-1 measure for SVM with Linear Kernal, %s loss, %s penalty, and C = %s is %s" % (loss, penalty, C, f1))
from pyspark import SparkContext import functions as fs if __name__ == '__main__': sc = SparkContext() chars = fs.filterByAddons(fs.readData(sc, "/cs455/project/data/chars"))
bs_x = 2048 #make Zebrafinch blocks into chunks of size 512x2048x2048 (smaller if on edge) from functions import readData, makeFolder, blockFolderPath, writeData zrange = np.arange(0, 45) sample_name = "" folder_path = "/n/pfister_lab2/Lab/tfranzmeyer/Zebrafinch/" for bz in zrange[::4]: z_block = int(bz / 4) print("bz is: " + str(bz), flush=True) filename = folder_path + "/" + sample_name + "/" + str(bz * 128).zfill(4) block_a = readData(box=[1], filename=filename) if bz != zrange[-1]: filename = folder_path + "/" + sample_name + "/" + str( (bz + 1) * 128).zfill(4) block_b = readData(box=[1], filename=filename) block_a = np.concatenate((block_a, block_b), axis=0) del block_b filename = folder_path + "/" + sample_name + "/" + str( (bz + 2) * 128).zfill(4) block_c = readData(box=[1], filename=filename) block_a = np.concatenate((block_a, block_c), axis=0) del block_c filename = folder_path + "/" + sample_name + "/" + str(
zrange = np.arange(param.z_start, param.z_start + param.n_blocks_z) sample_name = "stacked_512" folder_path = "/n/pfister_lab2/Lab/tfranzmeyer/Zebrafinch/" outp_ID = "allBlocksProcessed" for z_block in range(12): print("Z is " + str(z_block), flush=True) for y_block in range(3): print("Y is " + str(y_block), flush=True) filename = folder_path + "/" + sample_name + "/" + outp_ID + "/z" + str( z_block).zfill(4) + "y" + str(y_block).zfill(4) + "x" + str( 0).zfill(4) + "/" + str(z_block).zfill(4) block_0 = readData(box=[1], filename=filename) print(block_0.shape) filename = folder_path + "/" + sample_name + "/" + outp_ID + "/z" + str( z_block).zfill(4) + "y" + str(y_block).zfill(4) + "x" + str( 1).zfill(4) + "/" + str(z_block).zfill(4) block_1 = readData(box=[1], filename=filename) print(block_1.shape) filename = folder_path + "/" + sample_name + "/" + outp_ID + "/z" + str( z_block).zfill(4) + "y" + str(y_block).zfill(4) + "x" + str( 2).zfill(4) + "/" + str(z_block).zfill(4) block_2 = readData(box=[1], filename=filename) print(block_2.shape) x_data = np.concatenate((block_0, block_1, block_2), axis=2)