def load(args, client): if args["input"] is not None: load_file(client, args["input"], typecast=args["typecast"], cast=args["cast"], verbose=args["verbose"]) else: load_folder(client, args["folder"], typecast=args["typecast"], cast=args["cast"], verbose=args["verbose"])
def import_topic_data(): es = Elasticsearch() df = load_file('topics_desc.pkl') df['wagi'] = [[w[1] for w in words] for words in df.words] df['words'] = [[w[0] for w in words] for words in df.words] df.rename(columns={"words": "słowa", "desc": "opis"}, inplace=True) def to_doc(index, row): return {"_index": 'tematy', "_id": index, "_source": row.to_dict()} docs_gen = (to_doc(index, row) for index, row in df.iterrows()) tematy_settings = { "mappings": { "properties": { "embedding": { "type": "dense_vector", "dims": 768 } } } } es.indices.create(index='tematy', ignore=400, body=tematy_settings) helpers.bulk(es, tqdm(docs_gen, total=len(df)))
def last_two(): P = data.load_file() params = { "epochs": 4000, "neurons": 1024, "learn_method": 'classic' } # generate weight matrix W = np.random.randn(params['neurons'], params['neurons']) W = (W + W.T) / 2 W = W - np.diag(W.diagonal()) p = np.random.randint(-1, 1, params['neurons']).reshape(1,-1) p = (p*2) + 1 Hop = HopfieldNet(p) Hop.W = W recalled_set, energy = Hop.sequential_recall_shuffle(p, epochs=4000) plt.imshow(recalled_set.reshape(32,32)) plt.show() plt.plot(range(len(energy[0])), energy[0]) plt.xlabel('Epoch', fontsize=16) plt.ylabel('Energy', fontsize=16) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) plt.savefig('Energy_sym.png') plt.show()
def first(): P = data.load_file() params = {"epochs": 4000, "neurons": 1024, "learn_method": 'classic'} # ====== train ======================================== p0 = P[0].ravel() p1 = P[1].ravel() p2 = P[2].ravel() train_set = np.vstack((p0, p1)) train_set = np.vstack((train_set, p2)) Hop = HopfieldNet(train_set) Hop.batch_train() recall_set = np.vstack((p0, p1)) recall_set = np.vstack((recall_set, p2)) # ====== add one more ========================================= p3 = P[3].ravel() p4 = P[4].ravel() p5 = P[5].ravel() p6 = P[6].ravel() add_p = {0: p3, 1: p4, 2: p5, 3: p6} recalled_set = {} Hop = HopfieldNet(train_set) Hop.batch_train() recalled_set[0], energy = Hop.sequential_recall_shuffle(recall_set, epochs=2) for i in add_p.keys(): print(i) train_set = np.vstack((train_set, add_p[i])) Hop = HopfieldNet(train_set) Hop.batch_train() recall_set = np.vstack((recall_set, add_p[i])) recalled_set[i + 1], energy = Hop.sequential_recall_shuffle(recall_set, epochs=2) error_pattern = {} error_pattern[0] = [] error_pattern[1] = [] error_pattern[2] = [] for i in recalled_set.keys(): error_pattern[0] += [abs(np.mean(recalled_set[i][0, :] - p0))] error_pattern[1] += [abs(np.mean(recalled_set[i][1, :] - p1))] error_pattern[2] += [abs(np.mean(recalled_set[i][2, :] - p2))] fig, ax = plt.subplots(1, 3) ax[0].imshow(recalled_set[i][0, :].reshape(32, 32), origin="lower") ax[1].imshow(recalled_set[i][1, :].reshape(32, 32), origin="lower") ax[2].imshow(recalled_set[i][2, :].reshape(32, 32), origin="lower") plt.show() plt.plot(range(len(error_pattern[0])), np.array(error_pattern[0])) plt.plot(range(len(error_pattern[1])), np.array(error_pattern[1])) plt.plot(range(len(error_pattern[2])), np.array(error_pattern[2])) plt.xlabel('Number of patterns added', fontsize=16) plt.ylabel('Error %', fontsize=16) plt.show()
def test(): #The below line will tell you where to put .theanoarc for configuration #print os.path.expanduser('~/.theanorc.txt') params = serial.load('../networks/1457038649_26229_9000_NET0_PARAMS') net = net_configs.createNet0(epoch=0) net.load_params_from(params) img = load_file('../data/diabetic_ret/dataset_256_norm/test/30307_left.jpeg') prediction = net.predict(img) print prediction
def load_mtl(name): file = data.load_file('models/' + name) mtl = {} name = None for line in file: line = line.strip() if not line or line[0] == '#': continue tokens = line.split() cmd = tokens.pop(0) if cmd == 'newmtl': name = tokens[0] elif cmd == 'Kd': mtl[name] = tuple(int(float(x) * 255) for x in tokens) return mtl
def load_obj_data(name): #if name in _obj_cache: # return _obj_cache[name] file = data.load_file('models/' + name) vertices = [] faces = [] mtl = {} rot = rx(90) * 1 #reflect_x color = (127, 127, 127) for line in file: line = line.strip() if not line or line[0] == '#': continue tokens = line.split() cmd = tokens.pop(0) if cmd == 'v': vertices.append(rot(v3(float(x) for x in tokens))) elif cmd == 'f': faces.append((tuple(int(x.split('/')[0]) for x in tokens), color)) elif cmd == 'mtllib': mtl = load_mtl(tokens[0]) elif cmd == 'usemtl': color = mtl[tokens[0]] xs, ys, zs = zip(*vertices) bbox = Box(min(xs), max(xs), min(ys), max(ys), min(zs), max(zs)) vdata = [] cdata = [] for face, color in faces: vxs = [vertices[n-1] for n in face] for i in xrange(len(vxs) - 2): vdata.extend(vxs[0]) cdata.extend(color) vdata.extend(vxs[i+2]) cdata.extend(color) vdata.extend(vxs[i+1]) cdata.extend(color) #if DEBUG: # print name, len(vdata) // 9 return vdata, cdata, bbox
if dmg < 0: dmg = 0 print "%s landed a hit! Dealing %i damage." % (dict1['name'], dmg) dict2['hp'] -= dmg else: print "%s missed!" % (dict1['name']) if dict2['hp'] <= 0: alive = 2 return alive ans = raw_input('New player? ') if ans == 'yes' or ans == 'Yes' or ans == 'y' or ans == '1': filename = raw_input('Name your Character: ') data.new_file(filename) player = data.load_file(filename) else: filename = raw_input('What is your Character Name? ') player = data.load_file(filename) ans = raw_input('Wanna use health potion? ') if ans == 'yes' or ans == 'Yes' or ans == 'y' or ans == '1': player['hp'] += 5 if player['hp'] > player['life']: player['hp'] = player['life'] id = raw_input('Enter beast code: ') monster = beastiary.beast(id) print "You will fight a %s" % (monster['name']) raw_input("\nPress enter to continue.")
'epochs_all': epochs_all, 'iterations_all': iterations_all, 'batch_size_all': batch_size_all, 'learning_rate_all': learning_rate_all, } save_path = 'models/{}/'.format(datetime.now().strftime("%Y%m%d_%H%M%S")) os.mkdir(save_path) with open(save_path + 'hyperparameters.json', 'w') as f: json.dump(hyperparameters, f) print('LOADING DATA...') # data_train = load_file('data/LREC/train2012') # data_valid = load_file('data/LREC/dev2012') # data_test = load_file('data/LREC/test2011') # data_test_asr = load_file('data/LREC/test2011asr') data_train = load_file('data/NPR-podcasts/train') data_valid = load_file('data/NPR-podcasts/valid') data_test = load_file('data/NPR-podcasts/test') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) print('PREPROCESSING DATA...') X_train, y_train = preprocess_data(data_train, tokenizer, punctuation_enc, segment_size) X_valid, y_valid = preprocess_data(data_valid, tokenizer, punctuation_enc, segment_size) print('INITIALIZING MODEL...') output_size = len(punctuation_enc) bert_punc = nn.DataParallel(
noisy_pattern = np.copy(pattern) for i in picks: noisy_pattern[i] = pattern[i] * -1 return noisy_pattern def calc_acc(original_pattern, predicted_pattern): """ Calculate the accuracy of the model as the difference between the patterns. The flipped pattern also counts as a correct prediction. :param original_pattern: the target pattern :param predicted_pattern: the outcome of the model :return: accuracy: [0, 100] """ acc = np.sum(original_pattern == predicted_pattern) / float( original_pattern.shape[0]) negative_pattern = original_pattern * -1 neg_acc = np.sum(negative_pattern == predicted_pattern) / float( original_pattern.shape[0]) if neg_acc > acc: acc = -neg_acc return acc if __name__ == '__main__': p = data.load_file() part_3_4()
'learning_rate_all': learning_rate_all, } train_data_path = "/media/nas/samir-data/punctuation/all_datasets/data_dir_punctuator_v2_wait" train_data_path2 = "/media/nas/samir-data/punctuation/all_datasets/data_europarl/training-monolingual-europarl" data_path = "/media/nas/samir-data/punctuation/all_datasets/data_dir_punctuator_v3" save_path = 'models/{}/'.format(datetime.now().strftime("%Y%m%d_%H%M%S")) os.mkdir(save_path) with open(save_path + 'hyperparameters.json', 'w') as f: json.dump(hyperparameters, f) print('LOADING DATA...') #data_train = load_file(os.path.join(train_data_path2, 'europarl-v7.fr_cleaned.txt')) #data_train = load_file2(os.path.join(train_data_path, 'cleaned_leMonde_with_punct_v2_for_punctuator.train.txt'), segment_word) #data_train = load_file(os.path.join(train_data_path2, 'europarl-v7.fr_cleaned.txt')) data_train = load_file( os.path.join( data_path, 'subset_cleaned_leMonde_with_punct_v2_for_punctuator.train.txt')) data_valid = load_file( os.path.join( data_path, 'subset_cleaned_leMonde_with_punct_v2_for_punctuator.dev.txt')) tokenizer = CamembertTokenizer.from_pretrained('camembert-base') print('PREPROCESSING DATA...') X_train, y_train = encode_data3(data_train, tokenizer, puncs, punctuation_enc, segment_size) X_valid, y_valid = encode_data3(data_valid, tokenizer, puncs, punctuation_enc, segment_size) print('INITIALIZING MODEL...')
if y[i] == 1: actual += 1 if d(X[i], Z) == 0: common += 1 print('No of actual outiers : ', actual) print('Precision : ', common/len(Z)) print('Recall : ', common/actual) print('Cost : ', cost(C, X, Z)) if __name__ == "__main__": # Loading the existing data if real_data: temp_X, temp_Y = load_file(load_data) random.shuffle(temp_X) random.shuffle(temp_Y) U, y = removeDups(temp_X, temp_Y) # Synthetic Data else: U, y, C_, Z_, ids_ = make_data(5, 0, 8, 50) # # X_train, X_test, y_train, y_test = train_test_split(np.array(temp_X), np.array(temp_Y), test_size=0.33, random_state=42) # # print(X_test.shape) # # data is finally in U and labels in y # print('u shape ', len(U),',',len(U[0]))
# data_train = load_file('data/LREC/train2012') # data_valid = load_file('data/LREC/dev2012') # data_test = load_file('data/LREC/test2011') # data_test_asr = load_file('data/LREC/test2011asr') # import os # wpath = "C:/Users/HP/Google Drive/Colab Notebooks/Punctuation_Restoration/BertPunc-master_GPU/data/" # os.chdir(wpath) # print(os.getcwd()) with open("train2012_MAX.txt", 'r', encoding="utf8", errors="ignore" ) as f: # encoding='cp1252', encoding='utf8', errors="replace" data_train = f.readlines() # data_train = load_file('data/test2011') data_valid = load_file('data/dev2012') data_test = load_file('data/test2011') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) print('PREPROCESSING DATA...') X_train, y_train = preprocess_data(data_train, tokenizer, punctuation_enc, segment_size) X_valid, y_valid = preprocess_data(data_valid, tokenizer, punctuation_enc, segment_size) print('INITIALIZING MODEL...') output_size = len(punctuation_enc) bert_punc = nn.DataParallel( BertPunc(segment_size, output_size, dropout).cuda())
import data import algorithm # $, A, C, G, T, N/X 从 0 开始编码 # 前向扩展 => W -> Wa # 后向扩展 => W -> aW # 对于空串,k = l = 0, s = len(T)-1 # reference =["ACCTTGA"] # 不包含终止符 $ if __name__ == "__main__": print('hello, my bwa tools!') print('********************** DATA LOAD *************************') reference = data.load_file( 'd:\\short-read-aligment-based-on-the-BWT\\tools\\my_ref.fa') # fmd_index = bwt.BWA_FMD_index(reference) fmd_index = bwt.BWA_FMD_index_noend(reference) print('************************ RESULT **************************') # print('B :', fm_index.data['B']) # print('S :', fm_index.data['S']) # print('C :', fm_index.data['C']) # print('O :', fm_index.data['O']) # print(fm_index.text) # print('B :', fmd_index.data['B'])
def first_three(): P = data.load_file() params = { "epochs": 4000, "neurons": 1024, "learn_method": 'classic' } # ====== train ======================================== p0 = P[0].ravel() p1 = P[1].ravel() p2 = P[2].ravel() train_set = np.vstack((p0, p1)) train_set = np.vstack((train_set, p2)) Hop = HopfieldNet(train_set) Hop.batch_train() # get energy per pattern energy_p0 = Hop.energy(p0, threshold=0) energy_p1 = Hop.energy(p1, threshold=0) energy_p2 = Hop.energy(p2, threshold=0) print('The enegy for p0 is: {}'.format(energy_p0)) print('The enegy for p1 is: {}'.format(energy_p1)) print('The enegy for p2 is: {}'.format(energy_p2)) print('\n') # ====== test ========================================= p10 = P[9].ravel() p11 = P[10].ravel() # get energy per pattern energy_p10 = Hop.energy(p10, threshold=0) energy_p11 = Hop.energy(p11, threshold=0) print('The enegy for distorted p10 is: {}'.format(energy_p10)) print('The enegy for distorted p11 is: {}'.format(energy_p11)) print('\n') recall_set = np.vstack((p10, p11)) recalled_set, energy = Hop.sequential_recall_shuffle(recall_set, epochs=4) rs0_real = P[0] rs0_org = P[9] rs0 = recalled_set[0, :].reshape(32, 32) rs1_real = P[2] rs1_org = P[10] rs1 = recalled_set[1, :].reshape(32, 32) fig, ax = plt.subplots(1, 3) ax[0].imshow(rs0_real, origin="lower") ax[1].imshow(rs0_org, origin="lower") ax[2].imshow(rs0, origin="lower") plt.show() plt.plot(range(len(energy[0])), energy[0]) plt.xlabel('Epoch', fontsize=16) plt.ylabel('Energy', fontsize=16) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) #plt.savefig('Energy1.png') plt.show() fig, ax = plt.subplots(1, 3) ax[0].imshow(rs1_real, origin="lower") ax[1].imshow(rs1_org, origin="lower") ax[2].imshow(rs1, origin="lower") plt.show() plt.plot(range(len(energy[1])), energy[1]) plt.xlabel('Epoch', fontsize=16) plt.ylabel('Energy', fontsize=16) plt.ticklabel_format(style='sci', axis='y', scilimits=(0, 0)) #plt.savefig('Energy2.png') plt.show() print("hola")
import torch import torch.nn as nn from data import load_file from model import SentimentModel device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') TEXT, LABEL, train, valid, test, train_iter, valid_iter, test_iter = load_file(filepath='data/', device=device) INPUT_DIM = len(TEXT.vocab) EMBEDDING_DIM = 100 HIDDEN_DIM = 256 OUTPUT_DIM = 1 model = SentimentModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM) optimizer = torch.optim.SGD(model.parameters(), lr=3e-3) criterion = nn.BCEWithLogitsLoss() model = model.to(device) criterion = criterion.to(device) def binary_accuracy(preds, y): rounded_preds = torch.round(torch.sigmoid(preds)) correct = (rounded_preds == y).float() acc = correct.sum() / len(correct)
import numpy as np from glob import glob from transformers import CamembertTokenizer import torch from torch import nn #%matplotlib inline import json from tqdm import tqdm from sklearn import metrics from model import BertPunc, BertPunc_ner from data import load_file, load_file2, encode_data3, create_data_loader, create_data_loader_without_attentions segment_word = 12 data_test = load_file("dev.ester.clean") #data_test = load_file2("test_ilyes_segment_long1.txt", segment_word) #data_test = load_file("/media/nas/samir-data/punctuation/all_datasets/data_dir_punctuator_v3/subset_cleaned_leMonde_with_punct_v2_for_punctuator.test.txt") tokenizer = CamembertTokenizer.from_pretrained('camembert-base') punctuation_enc = {'PAD': 0, 'TOKEN': 1, ',': 2, '.': 3, '▁?': 4} #punctuation_enc = { # 'PAD': 0, # 'TOKEN': 1, # ',': 2, # '.': 3, # '▁?': 4, # '▁:': 5, # '▁!': 6,
#filter_lengths = [1,2,3,4,5,6,7] filter_lengths = [4, 5, 6] print('Filter lengths:', filter_lengths) hidden_dims = 250 print('Hidden dems:', hidden_dims) nb_epoch = 20 embedding_droupout = 0.2 print('Embedding dropout:', embedding_droupout) fc_dropout = 0.5 print('Fully-connected dropout:', fc_dropout) # cross validation n_folds = 10 print('Loading data...') X_train, y_train, num_classes = load_file(full_train_file, alphabet) print(len(X_train), 'train sequences') print('Pad sequences (samples x time)') X_train = sequence.pad_sequences(X_train, maxlen=maxlen) print('X_train shape:', X_train.shape) y_train = np.array(y_train) # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, num_classes) def make_model(maxlen, alphabet_size, embedding_dims, embedding_droupout, nb_filters, filter_lengths, hidden_dims, fc_dropout, num_classes): print('Build model...')
os.makedirs(save_path) with open(save_path+'hyperparameters.json', 'w') as f: json.dump(hyperparameters, f) print('LOADING DATA...') # LREC数据集 # data_train = load_file('data/LREC/train2012') # data_valid = load_file('data/LREC/dev2012') # data_test = load_file('data/LREC/test2011') # asr数据集 # data_test_asr = load_file('data/LREC/test2011asr') # data_train = load_file('data/NPR-podcasts/train') # data_valid = load_file('data/NPR-podcasts/valid') # data_test = load_file('data/NPR-podcasts/test') # 中文数据集************************** data_train = load_file('data/zh_pfdsj/train_proc') data_valid = load_file('data/zh_pfdsj/dev_proc') data_test = load_file('data/zh_pfdsj/test_proc') # vocab.txt所在的位置 # tokenizer = BertTokenizer.from_pretrained('./models/', do_lower_case=True) # tokenizer = AutoTokenizer.from_pretrained('./models/albert_en/', do_lower_case=True) # 中文数据集tokenizer # tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True) # distillbert # tokenizer = AutoTokenizer.from_pretrained('./models/bert_distill_chinese', do_lower_case=True) # ALbert-small-rnn tokenizer # tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True) # # ALbert-small-dense-hidden tokenizer # tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True) # NOTE ALbert-small-dense-Rnn tokenizer tokenizer = BertTokenizer.from_pretrained('./models/albert_chinese_small/', do_lower_case=True)