def query(): # n_initial = 100 # X, y = load_digits(return_X_y=True) # X_train, X_test, y_train, y_test = train_test_split(X, y) # # initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) # # X_initial, y_initial = X_train[initial_idx], y_train[initial_idx] # X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) strategy = None classifier = None file = request.files['file'] # if user does not select file, browser also # submit a empty part without filename filename = secure_filename(file.filename) # shutil.rmtree(os.path.join(app.config['UPLOAD_FOLDER'],filename.split(".")[0])) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(UPLOAD_FOLDER, filename)) if(filename.split(".")[1]=="rar"): patoolib.extract_archive(os.path.join(UPLOAD_FOLDER, filename), outdir=os.path.join(UPLOAD_FOLDER)) else: zip_ref = zipfile.ZipFile(os.path.join(UPLOAD_FOLDER, filename), 'r') zip_ref.extractall(UPLOAD_FOLDER) zip_ref.close() print("Succesfull") st = request.form.get('strategy_select') cl = request.form.get('classifier_select') option = int(request.form.get('structure_select')) print(cl) if(str(cl)=='Random Forest'): classifier = RandomForestClassifier() elif(str(cl)=='KNN'): classifier = KNeighborsClassifier() else: classifier = DecisionTreeClassifier() n_queries = request.form['queries'] print(st) classlist =[] classes = {} data = {} data['image'] = [] data['label'] = [] filename = secure_filename(file.filename) print(filename) if option == 0: for dirname, _, filenames in os.walk(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): print(filenames) for filename in filenames: if('.jpg' in filename or 'jpeg' in filename or 'png' in filename): image = Image.open(os.path.join(dirname, filename)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) data['image'] = np.append(data['image'],x,axis=0) except: continue if(dirname.split('\\')[-1] not in classes.keys()): classlist.append({'name':dirname.split('\\')[-1],'number':len(classes)}) classes[dirname.split('\\')[-1]] = len(classes) #print(os.path.join(dirname, filename)) #print(dirname) data['label'].append(classes[dirname.split('\\')[-1]]) print(classes) else: for imfile in os.listdir(os.path.join(UPLOAD_FOLDER,filename.split(".")[0])): if imfile.endswith(".jpg") or imfile.endswith(".jpeg") or imfile.endswith("png"): image = Image.open(os.path.join(os.path.join(UPLOAD_FOLDER,filename.split(".")[0]), imfile)) image = image.resize((200,200), Image.ANTIALIAS) size = np.array(image).size if(len(classes)==0): data['image'] = np.array(numpy.array(image)).reshape((1,size)) else: try: x = numpy.array(image).reshape((1,size)) data['image'] = np.append(data['image'],x,axis=0) except: continue if(("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))) not in classes.keys()): classlist.append({'name':("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0]))),'number':len(classes)}) classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))] = len(classes) data['label'].append(classes[("".join(re.split("[^a-zA-Z]*",imfile.split(".")[0])))]) print(classes) else: continue X = data['image'] y = data['label'] n_initial = 100 X_train, X_test, y_train, y_test = train_test_split(X, y) initial_idx = np.random.choice(range(len(X_train)), size=n_initial, replace=False) X_initial=[] y_initial = [] print(type(X_initial)) for i in range(n_initial): v = np.array(X_train[initial_idx[i]]).reshape((1,size)) #print(v.shape) y_initial.append(y_train[i]) if(i==0): X_initial = np.array(X_train[initial_idx[i]]).reshape((1,size)) print(X_initial.shape) else: X_initial = np.append(X_initial,v,axis=0) #print("X Shape",X_initial.shape) # X_initial = X_initial.append(X_train[initial_idx[i]]) X_pool, y_pool = np.delete(X_train, initial_idx, axis=0), np.delete(y_train, initial_idx, axis=0) print(X.shape) print(X[0].shape) print(X_initial.shape) params = {} params["X_test"] = X_test params["y_test"] = y_test params["counter"] = n_queries params["X_pool"] = X_pool params["y_pool"] = y_pool if(str(st)=='Uncertainty Sampling'): print(classifier) print(cl) learner = ActiveLearner( estimator=classifier, query_strategy=uncertainty_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) print("Calling Helper") return helper() elif(str(st)=='Entropy Sampling'): print(classifier) print(cl) learner = ActiveLearner( estimator=classifier, query_strategy=entropy_sampling, X_training=X_initial, y_training=y_initial ) params["learner"] = learner accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Random Sampling'): learner = ActiveLearner( estimator=classifier, query_strategy=random_sampling, X_training=X_train, y_training=y_train ) accuracy_scores = learner.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,learner,None,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Vote Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=vote_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Uncertainty Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=uncertainty_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Max Disagreement Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_disagreement_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Max STD Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=max_std_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper() elif(str(st)=='Query By Committee(Consensus Entropy Sampling)'): learner1 = ActiveLearner( estimator = RandomForestClassifier(), X_training=X_train,y_training=y_train ) learner2 = ActiveLearner( estimator=KNeighborsClassifier(), X_training=X_train,y_training=y_train ) learner3 = ActiveLearner( estimator=DecisionTreeClassifier(), X_training=X_train,y_training=y_train ) committee = Committee( learner_list=[learner1,learner2,learner3], query_strategy=consensus_entropy_sampling ) params["committee"] = committee accuracy_scores = committee.score(X_test, y_test) params["accuracy"] = accuracy_scores print(accuracy_scores) accuracy = [] accuracy.append(accuracy_scores) data = Data(n_queries,X_pool,y_pool,None,committee,accuracy,X_test,y_test,classlist,n_queries) return helper()
def parse_parser_results_new(text): """ This is the nasty bit of code to interact with the command-line interface of the CoreNLP tools. Takes a string of the parser results and then returns a Python list of dictionaries, one for each parsed sentence. updated for newer version of stanford corenlp -- 2015 """ data_list = [] data = None lastline = None following_line = None state = STATE_START #for line in re.split("\r\n(?![^\[]*\])",text): seqs = re.split("\r\n", text) i = 0 #for line in re.split("\r\n", text): while i < len(seqs): line = seqs[i] line = line.strip() if line.startswith('NLP>'): # end if data: data_list.append(data) # add last one break if line.startswith("Sentence #"): if data: data_list.append(data) data = Data() if SENTENCE_NO_PATTERN.match(line): state = STATE_TEXT else: lastline = line state = STATE_SENT_ERROR i += 1 elif state == STATE_SENT_ERROR: line = lastline + line assert SENTENCE_NO_PATTERN.match(line) is not None state = STATE_TEXT i += 1 elif state == STATE_TEXT_ERROR: line = line + following_line data.addText(line) state = STATE_WORDS i += 2 elif state == STATE_TEXT: Data.newSen() data.addText(line) state = STATE_WORDS i += 1 elif state == STATE_WORDS: if len(line) == 0: continue if not line.startswith("[Text="): #raise Exception('Parse error. Could not find "[Text=" in: %s' % line) print >> sys.stderr, 'Parse error. Could not find "[Text=" in: %s' % line print >> sys.stderr, 'Attempt to fixing error.' following_line = line state = STATE_TEXT_ERROR i -= 1 continue #for s in WORD_PATTERN.findall(line): wline = line while WORD_PATTERN.match(wline): t = parse_bracketed(wline[1:-1]) if t[0] == '': i += 1 wline = seqs[i] continue data.addToken(t[0], t[1][u'CharacterOffsetBegin'], t[1][u'CharacterOffsetEnd'], t[1][u'Lemma'], t[1][u'PartOfSpeech'], t[1][u'NamedEntityTag']) i += 1 wline = seqs[i] if WORD_ERROR_PATTERN.match(wline): # handle format error wline = wline + seqs[i + 1] wline = wline.strip() t = parse_bracketed(wline[1:-1]) data.addToken(t[0], t[1][u'CharacterOffsetBegin'], t[1][u'CharacterOffsetEnd'], t[1][u'Lemma'], t[1][u'PartOfSpeech'], t[1][u'NamedEntityTag']) i += 2 state = STATE_WORDS continue state = STATE_TREE parsed = [] elif state == STATE_TREE: if len(line) == 0: state = STATE_DEPENDENCY parsed = " ".join(parsed) i += 1 #data.addTree(Tree.parse(parsed)) else: parsed.append(line) i += 1 elif state == STATE_DEPENDENCY: if len(line) == 0: state = STATE_COREFERENCE else: pass ''' # don't need here split_entry = re.split("\(|, ", line[:-1]) if len(split_entry) == 3: rel, l_lemma, r_lemma = split_entry m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', l_lemma) l_lemma, l_index = m.group('lemma'), m.group('index') m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', r_lemma) r_lemma, r_index = m.group('lemma'), m.group('index') data.addDependency( rel, l_lemma, r_lemma, l_index, r_index) ''' i += 1 elif state == STATE_COREFERENCE: if "Coreference set" in line: #if 'coref' not in results: # results['coref'] = [] coref_set = [] data.addCoref(coref_set) else: for src_i, src_pos, src_l, src_r, sink_i, sink_pos, sink_l, sink_r, src_word, sink_word in CR_PATTERN.findall( line): src_i, src_pos, src_l, src_r = int(src_i), int( src_pos), int(src_l), int(src_r) sink_i, sink_pos, sink_l, sink_r = int(sink_i), int( sink_pos), int(sink_l), int(sink_r) coref_set.append( ((src_word, src_i, src_pos, src_l, src_r), (sink_word, sink_i, sink_pos, sink_l, sink_r))) i += 1 else: i += 1 return data_list
# -*- coding: utf-8 -*- from sys import path path.append('./data') path.append('./methods') from data import Data from svm import SVM from ann import ANN from nb import NaiveBayes from time import time import graph data = Data("mushrooms.csv") method = ANN(data) i = time() method.train() method.predict() tempo = time() - i result = method.getPercentage() print 'Tempo (ms):', tempo print 'Taxa de acerto:', result print '' data = Data("mushrooms.csv") method = SVM(data) i = time() method.train() method.predict() tempo = time() - i result = method.getPercentage()
complete_data = [] with open(self.path, encoding='cp932', errors='ignore') as f: reader = csv.DictReader(f) for line in reader: context = [ line['InputSentence1'], line['InputSentence2'], line['InputSentence3'], line['InputSentence4'] ] option_0 = line['RandomFifthSentenceQuiz1'] option_1 = line['RandomFifthSentenceQuiz2'] label = int(line['AnswerRightEnding']) - 1 complete_data.append({ 'context': context, 'options': [option_0, option_1], 'label': label }) return complete_data if __name__ == '__main__': from data import Data train_data = Data('data/train2017.csv').get_train_data() print(train_data[-1], len(train_data)) valid_2016_data = Data('data/valid2016.csv').get_validtest_data() valid_2018_data = Data('data/valid2018.csv').get_validtest_data() test_data = Data('data/test2016.csv').get_validtest_data() print(valid_2016_data[-1], len(valid_2016_data)) print(valid_2018_data[-1], len(valid_2018_data)) print(test_data[-1], len(test_data))
def gen_data(): offset = csts.HEIGHT / (csts.LEN + 1) array = [Data(i * offset) for i in range(1, csts.LEN + 1)] random.shuffle(array) return array
for j in range(5): combineFoldName = combineFoldsNames[j] singleFoldName = singleFoldNames[j] _indexCollection = [] _root = [] X_train, y_train, num_features = read_libsvm(combineFoldName) x = X_train.todense() for y in range(size): _index = [] for z in range(50): _index.append(random.randint(0, 255)) formatFile(x, y_train, _index, 799) trainData = np.loadtxt('fileFormated', delimiter=',', dtype=str) trainData_obj = Data(data=trainData) attributesSet = trainData_obj.attributes root = id3Depth(attributesSet, trainData_obj, 1) _root.append(root) _indexCollection.append(_index) _predictions = [] full = True X_train, y_train, num_features = read_libsvm(singleFoldName) x = X_train.todense() for z in range(size): formatFile(x, y_train, _indexCollection[z], 867598) trainData = np.loadtxt('fileFormated', delimiter=',', dtype=str) trainData_obj = Data(data=trainData)
def getCashFlow(strid): data = Data() #---------現金流量表畫圖 length_cashFlow = len(data.dates['cashFlowsSheet']) cashFlowOperating = data.get('營業活動之淨現金流入(流出)', length_cashFlow)[strid].to_frame() cashFlowOperating = cashFlowOperating.reset_index() cashFlowOperating['date'] = cashFlowOperating['date'].apply(modifMonthtoSeason) copyCashFlow = cashFlowOperating.copy() for i in range(1, len(cashFlowOperating)): cashFlowOperating.at[i, strid] = cashFlowOperating.at[i, strid] - copyCashFlow.at[i-1, strid] cashFlowOperating.set_index('date', inplace=True) #投資現金流 length_investFlow = len(data.dates['cashFlowsSheet']) investFlowOperating = data.get('投資活動之淨現金流入(流出)', length_investFlow)[strid].to_frame() investFlowOperating = investFlowOperating.reset_index() investFlowOperating['date'] = investFlowOperating['date'].apply(modifMonthtoSeason) copyInvestFlow = investFlowOperating.copy() for i in range(1, len(investFlowOperating)): investFlowOperating.at[i, strid] = investFlowOperating.at[i, strid] - copyInvestFlow.at[i-1, strid] investFlowOperating.set_index('date', inplace=True) #End 投資現金流 #籌資現金流 length_funddraseFlow = len(data.dates['cashFlowsSheet']) fundraseFlowOperating = data.get('籌資活動之淨現金流入(流出)', length_funddraseFlow)[strid].to_frame() fundraseFlowOperating = fundraseFlowOperating.reset_index() fundraseFlowOperating['date'] = fundraseFlowOperating['date'].apply(modifMonthtoSeason) copyfundraseFlow = fundraseFlowOperating.copy() for i in range(1, len(fundraseFlowOperating)): fundraseFlowOperating.at[i, strid] = fundraseFlowOperating.at[i, strid] - copyfundraseFlow.at[i-1, strid] fundraseFlowOperating.set_index('date', inplace=True) #End 籌資現金流 #淨現金流 #淨現金流 = 營業現金流 - 投資現金流 + 籌資現金流 netCashFlow = cashFlowOperating.copy() re_netCash = netCashFlow.reset_index() re_cash = cashFlowOperating.reset_index() re_invest = investFlowOperating.reset_index() re_fund = fundraseFlowOperating.reset_index() for i in range(0, len(cashFlowOperating)): if re_invest.at[i, strid] < 0: re_netCash.at[i, strid] = re_cash.at[i, strid] + re_invest.at[i, strid] else: re_netCash.at[i, strid] = re_cash.at[i, strid] - re_invest.at[i, strid] re_netCash.set_index('date', inplace=True) re_netCash = re_netCash.add(fundraseFlowOperating, axis=0) #End 淨現金流 cashFlowOperatingfig = [{ 'x' : cashFlowOperating.index, 'y' : cashFlowOperating[strid], 'type':'line', 'name':'營業現金流', 'hovertemplate': "%{x}營業現金流= %{y:$,}" },{ 'x': investFlowOperating.index, 'y':investFlowOperating[strid], 'type':'line', 'name':'投資現金流', 'hovertemplate': "%{x}投資現金流= %{y:$,}" },{ 'x': fundraseFlowOperating.index, 'y':fundraseFlowOperating[strid], 'type':'line', 'name':'籌資現金流', 'hovertemplate': "%{x}籌資現金流= %{y:$,}" },{ 'x': re_netCash.index, 'y':re_netCash[strid], 'type':'line', 'name':'淨現金流', 'hovertemplate': "%{x}淨現金流= %{y:$,}" }, ] #---------End 現金流量表畫圖 fig = go.Figure() fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[0])['x'], y=(cashFlowOperatingfig[0])['y'], name='營業現金流', hovertemplate='%{x}營業現金流= %{y:$,}')) fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[1])['x'], y=(cashFlowOperatingfig[1])['y'], name='投資現金流', hovertemplate="%{x}投資現金流= %{y:$,}")) fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[2])['x'], y=(cashFlowOperatingfig[2])['y'], name='籌資現金流', hovertemplate='%{x}籌資現金流= %{y:$,}')) fig.add_trace(go.Scatter(x=(cashFlowOperatingfig[3])['x'], y=(cashFlowOperatingfig[3])['y'], name='淨現金流', hovertemplate='%{x}淨現金流= %{y:$,}')) fig.update_layout( plot_bgcolor = '#36404A', paper_bgcolor = '#36404A', font_color = '#7FDBFF', xaxis = {'title':'季度'}, yaxis = {'title':'千元'} ) return fig
st.write('') st.subheader('2.1 Peek at the raw data') st.write('**The first 5 rows of the raw data:**') st.write(data_df.head()) st.write('') st.write('**Some basic statistics:**') st.write('Number of data points =', len(data_df.index)) st.write('Number of features =', len(data_df.columns) - 1) ################################################################################ st.write('') st.subheader('2.2 Exploration & Processing') data = Data(data_df, sensitive_features, target_feature, pos_target) data_df = data.data_df bias_cols = data.bias_cols target_col = data.target_col feature_cols = data.feature_cols bias_col_types = data.bias_col_types categories = data.categories # Save our processed data data_df.to_csv('../data/processed/' + filename, index=False) write_params_to_file(bias_cols, target_col, bias_col_types, categories) ################################################################################ st.write('') st.subheader('2.3 Post-processing exploration')
print "\nclass probs:" for c in self.clssprobs: print c, self.clssprobs[c] print "\nattr probs:" for c in self.clssprobs: print "\nclass", c, ":" for a in sorted(self.attrcnts): print a, self.condprobs[a, c] if __name__ == "__main__": from confmat import ConfMat filename = "ds/weatherNominalTr.txt" d = Data(filename) ## d.report() pr = MaxAPost(d) pr.train() cm = ConfMat(pr.clsscnts) ## print for (v, c_true) in d.test_set: c_pred = pr.predict(v)[0] ## print v, c_pred, "( true class:", c_true, ")" cm.mat[c_pred, c_true] += 1 print ## pr.show() ## print cm.report()
args = parser.parse_args() # set arguments l_r = args.l_r batch_size = args.batch_size pickle_dir = args.pickle_dir max_seq = args.max_seq epochs = args.epochs is_reuse = args.is_reuse load_path = args.load_path save_path = args.save_path multi_gpu = args.multi_gpu # load data dataset = Data('dataset/processed') print(dataset) # load model learning_rate = callback.CustomSchedule(par.embedding_dim) opt = Adam(l_r, beta_1=0.9, beta_2=0.98, epsilon=1e-9) # define model mt = MusicTransformer(embedding_dim=256, vocab_size=par.vocab_size, num_layer=6, max_seq=max_seq, dropout=0.2, debug=False, loader_path=load_path) mt.compile(optimizer=opt, loss=callback.transformer_dist_train_loss)
from visualisation import plt_scores_lambs from visualisation import plt_pred_obser from visualisation import plt_residu_lambs from visualisation import plt_scores_datasize from visualisation import plt_square_lambs from visualisation import plt_coefs_coefs from sklearn.decomposition import PCA from model_selection import _k_split from visualisation import create_plots import time from data import Data # Paste the dataset location below location = 'testdata_rgb.txt' d = Data() # X, y = d.boston() X, y = d.Residuals_Match_DMhydro_Less_z() X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) method = None while method == None: l = input( 'What method do you want to use ?\n a Batch Gradient Descent\n b Stochastic Gradient descent\n c ' 'Mini-batch ' 'Gradient Descent\n d Ordinary Least Squares Solution\n e Coordinate Descent\n f Accelerated Proximal Gradient Descent\n g Alternating Direction Method of Multipliers ' ) if l == "a": method = "bgd" elif l == "b":
from data import Data from subset import PrivateDomain from model import sess_runner #main_dataset = Data(log=False, # filename='../data/3732_filtered.txt', # batch_size=50, # sep=' ') main_dataset = Data(filename='../data/3732_filtered.txt', split=True, split_start=800, #additional_info='../data/gse80655_annotation.txt', batch_size=50, sep=' ', log=False) supporting_dataset = Data(filename='../data/3732_filtered.txt', split=True, split_end=800, ind=1, #additional_info='../data/gse80655_annotation.txt', batch_size=50, sep=' ', log=False) def runner(): model = [PrivateDomain(main_dataset, delay=1, tagged=False)] model.append(PrivateDomain(supporting_dataset,
if __name__ == "__main__": from naivebayes import NaiveBayes from data import Data print_numbers = False datafile = "ds/titanicTr.txt" pos_class = "Survived:Yes" #pos_class = "Survived:No" # datafile = "cmcTr.txt" # pos_class = "contraceptive-method:none" d = Data(datafile) prnb = NaiveBayes(d) prnb.train() r = Roc(prnb, pos_class) r.do_curve() print "Predicting", pos_class, "for data file", datafile, print "with", int(r.curve[2]), "positive instances and", int( r.curve[3]), "negative instances" if print_numbers: prnb.show()
def saveStats(self): dt = Data() api = API() ut = Util() stats = Stats() curtime = ut.getGMTTime() clanlist = dt.read('', 'ClanList') for clan in clanlist: players = api.getClanMembers(api.getClanID(clan[0])) clanID = api.getClanID(clan[0]) clanname = api.getClanTag(clanID) playernum = len(players) clanavgpr = 0.0 clanavgbt = 0 clanavgdmg = 0.0 clanavgkills = 0.0 clanavgwr = 0.0 clanavgspd = 0.0 clanavgptd = 0.0 data2 = [] data2.append([int(clanID)]) data2.append([clanname]) if players is not None: for player in players: data = [] name = api.getPlayerName(player) pr = stats.PRcalculate(player) bt = api.getPlayerBattles(player) if (bt == 0): break avgdmg = api.getPlayerAvgDmg(player) avgwr = api.getPlayerAvgWR(player) avgkills = api.getPlayerAvgKills(player) avgspdmg = api.getPlayerAvgSpottingDmg(player) avgptdmg = api.getPlayerAvgPotentialDmg(player) # calculate avg dmg, wr,kills, data.append([name]) data.append([player]) data.append([pr]) data.append([bt]) data.append([avgdmg]) data.append([avgkills]) data.append([avgwr]) data.append([avgspdmg]) data.append([avgptdmg]) clanavgpr += pr clanavgbt += bt clanavgdmg += avgdmg clanavgkills += avgkills clanavgwr += avgwr clanavgspd += avgspdmg clanavgptd += avgptdmg temppath = str(clan[0]) + "/" + str(name) filename = str(curtime) + ".csv" print(temppath + " " + filename) print(data) dt.write(temppath, filename, data) data2.append([float(clanavgpr / playernum)]) data2.append([int(clanavgbt / playernum)]) data2.append([float(clanavgdmg / playernum)]) data2.append([float(clanavgkills / playernum)]) data2.append([float(clanavgwr / playernum)]) data2.append([float(clanavgspd / playernum)]) data2.append([float(clanavgptd / playernum)]) dt.write(str(clan[0]), str(curtime) + ".csv", data2) return 0
""" 存放了一些测试中产生的代码 Stored some code generated in the test # @Author : Tian Xiao """ from LassoRegression import LassoRegression from data import Data import numpy as np from model_selection import train_test_split X, y = Data().multi_data_boston() def poly_test(X, y, degree=1): X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) poly_reg = LassoRegression(degree=degree) poly_reg.fit(X_train, y_train, lasso=False, method="normal") print(poly_reg.score(X_test, y_test)) X_test = X_test[:5] y_predict = poly_reg.predict(X_test) y_true = y_test[:5] for i in range(len(y_true)): print(y_true[i], y_predict[i]) print() def lasso_test(X, y, degree=1): X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) lasso_reg = LassoRegression(degree=degree) lasso_reg.fit(X_train, y_train, lasso=True, method="bgd") print(lasso_reg.score(X_test, y_test)) X_test = X_test[:5]
def main(out_file='output/result.json', model_config='config/rnn_config.json'): """Test model for given test set on 1 GPU or CPU. Args: in_file: file to be tested out_file: output file model_config: config file """ # 0. Load config with open(model_config) as fin: config = json.load(fin, object_hook=lambda d: SimpleNamespace(**d)) if torch.cuda.is_available(): device = torch.device('cuda') # device = torch.device('cpu') else: device = torch.device('cpu') #0. preprocess file # id_list = [] # with open(in_file, 'r', encoding='utf-8') as fin: # for line in fin: # sents = json.loads(line.strip()) # id = sents['id'] # id_list.append(id) # id_dict = dict(zip(range(len(id_list)), id_list)) # 1. Load data data = Data(vocab_file=os.path.join(config.model_path, 'vocab.txt'), max_seq_len=config.max_seq_len, model_type=config.model_type, config=config) test_set, sc_list, label_list = data.load_file(config.test_file_path, train=False) token_list = [] for line in sc_list: tokens = data.tokenizer.convert_ids_to_tokens(line) token_list.append(tokens) data_loader_test = DataLoader(test_set, batch_size=config.batch_size, shuffle=False) # 2. Load model model = MODEL_MAP[config.model_type](config) model = load_torch_model(model, model_path=os.path.join(config.model_path, 'model.bin')) model.to(device) # 3. Evaluate answer_list, length_list = evaluate(model, data_loader_test, device, isTest=True) def flatten(ll): return list(itertools.chain(*ll)) # train_answers = handy_tool(label_list, length_list) #gold # #answer_list = handy_tool(answer_list, length_list) #prediction # train_answers = flatten(train_answers) # train_predictions = flatten(answer_list) # # train_acc, train_f1 = calculate_accuracy_f1( # train_answers, train_predictions) # print(train_acc, train_f1) test_json = json.load(open(config.test_file_path, 'r', encoding='utf-8')) id_list = [item['id'] for item in test_json] mod_tokens_list = handy_tool(token_list, length_list) result = [ result_to_json(t, s) for t, s in zip(mod_tokens_list, answer_list) ] # 4. Write answers to file with open(out_file, 'w', encoding='utf8') as fout: result_list = [] for id, item in zip(id_list, result): entities = item['entities'] words = [ d['word'] + "-" + d['type'] for d in entities if d['type'] != 's' ] unique_words = [] for w in words: if w not in unique_words: unique_words.append(w) item = {} item['id'] = id item['entities'] = unique_words result_list.append(item) json.dump(result_list, fout, ensure_ascii=False, indent=4)
def infer(flowtron_path, waveglow_path, text, speaker_id, n_frames, sigma, seed): torch.manual_seed(seed) torch.cuda.manual_seed(seed) # load waveglow waveglow = torch.load(waveglow_path)['model'].cuda().eval() waveglow.cuda().half() for k in waveglow.convinv: k.float() waveglow.eval() # load flowtron model = Flowtron(**model_config).cuda() cpt_dict = torch.load(flowtron_path) if 'model' in cpt_dict: dummy_dict = cpt_dict['model'].state_dict() else: dummy_dict = cpt_dict['state_dict'] model.load_state_dict(dummy_dict) model.eval() print("Loaded checkpoint '{}')".format(flowtron_path)) ignore_keys = ['training_files', 'validation_files'] trainset = Data( data_config['training_files'], **dict((k, v) for k, v in data_config.items() if k not in ignore_keys)) tic_prep = time.time() str_text = text num_char = len(str_text) num_word = len(str_text.split()) speaker_vecs = trainset.get_speaker_id(speaker_id).cuda() text = trainset.get_text(text).cuda() speaker_vecs = speaker_vecs[None] text = text[None] toc_prep = time.time() ############## warm up ########### to measure exact flowtron inference time with torch.no_grad(): tic_warmup = time.time() residual = torch.cuda.FloatTensor(1, 80, n_frames).normal_() * sigma mels, attentions = model.infer(residual, speaker_vecs, text) toc_warmup = time.time() tic_flowtron = time.time() with torch.no_grad( ): #,torch.autograd.profiler.emit_nvtx(): ########### prof. tic_residual = time.time() residual = torch.cuda.FloatTensor(1, 80, n_frames).normal_() * sigma toc_residual = time.time() # profiler.start() ########### prof. mels, attentions = model.infer(residual, speaker_vecs, text) # profiler.stop() ########### prof. toc_flowtron = time.time() for k in range(len(attentions)): attention = torch.cat(attentions[k]).cpu().numpy() fig, axes = plt.subplots(1, 2, figsize=(16, 4)) axes[0].imshow(mels[0].cpu().numpy(), origin='bottom', aspect='auto') axes[1].imshow(attention[:, 0].transpose(), origin='bottom', aspect='auto') fig.savefig('sid{}_sigma{}_attnlayer{}.png'.format( speaker_id, sigma, k)) plt.close("all") tic_waveglow = time.time() audio = waveglow.infer(mels.half(), sigma=0.8).float() toc_waveglow = time.time() audio = audio.cpu().numpy()[0] # normalize audio for now audio = audio / np.abs(audio).max() len_audio = len(audio) dur_audio = len_audio / 22050 num_frames = int(len_audio / 256) dur_prep = toc_prep - tic_prep dur_residual = toc_residual - tic_residual dur_flowtron_in = toc_flowtron - toc_residual dur_warmup = toc_warmup - tic_warmup dur_flowtron_out = toc_flowtron - tic_residual dur_waveglow = toc_waveglow - tic_waveglow dur_total = dur_prep + dur_flowtron_out + dur_waveglow RTF = dur_audio / dur_total str_text = "\n text : " + str_text str_num = "\n text {:d} char {:d} words ".format(num_char, num_word) str_audio = "\n generated audio : {:2.3f} samples {:2.3f} sec with {:d} mel frames ".format( len_audio, dur_audio, num_frames) str_perf = "\n total time {:2.3f} = text prep {:2.3f} + flowtron{:2.3f} + wg {:2.3f} ".format( dur_total, dur_prep, dur_flowtron_out, dur_waveglow) str_flow = "\n total flowtron {:2.3f} = residual cal {:2.3f} + flowtron {:2.3f} ".format( dur_flowtron_out, dur_residual, dur_flowtron_in) str_rtf = "\n RTF is {:2.3f} x with warm up {:2.3f} ".format( RTF, dur_warmup) print(str_text, str_num, str_audio, str_perf, str_flow, str_rtf) write("sid{}_sigma{}.wav".format(speaker_id, sigma), data_config['sampling_rate'], audio)
def main(): parser = argparse.ArgumentParser(description="Experiment setup") # misc parser.add_argument('--seed', default=33, type=int) parser.add_argument('--gpu', default="", type=str) parser.add_argument('--no_train', default=False, action="store_true") parser.add_argument('--from_model_ckpt', default=None, type=str) parser.add_argument('--no_rules', default=False, action="store_true") parser.add_argument('--rule_thr', default=1e-2, type=float) parser.add_argument('--no_preds', default=False, action="store_true") parser.add_argument('--get_vocab_embed', default=False, action="store_true") parser.add_argument('--exps_dir', default=None, type=str) parser.add_argument('--exp_name', default=None, type=str) # data property parser.add_argument('--datadir', default=None, type=str) parser.add_argument('--resplit', default=False, action="store_true") parser.add_argument('--no_link_percent', default=0., type=float) parser.add_argument('--type_check', default=False, action="store_true") parser.add_argument('--domain_size', default=128, type=int) parser.add_argument('--no_extra_facts', default=False, action="store_true") parser.add_argument('--query_is_language', default=False, action="store_true") parser.add_argument('--vocab_embed_size', default=128, type=int) # model architecture parser.add_argument('--num_step', default=3, type=int) parser.add_argument('--num_layer', default=1, type=int) parser.add_argument('--rnn_state_size', default=128, type=int) parser.add_argument('--query_embed_size', default=128, type=int) # optimization parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--print_per_batch', default=3, type=int) parser.add_argument('--max_epoch', default=10, type=int) parser.add_argument('--min_epoch', default=5, type=int) parser.add_argument('--learning_rate', default=0.001, type=float) parser.add_argument('--no_norm', default=False, action="store_true") parser.add_argument('--thr', default=1e-20, type=float) parser.add_argument('--dropout', default=0., type=float) # evaluation parser.add_argument('--get_phead', default=False, action="store_true") parser.add_argument('--adv_rank', default=False, action="store_true") parser.add_argument('--rand_break', default=False, action="store_true") parser.add_argument('--accuracy', default=False, action="store_true") parser.add_argument('--top_k', default=10, type=int) d = vars(parser.parse_args()) option = Option(d) if option.exp_name is None: option.tag = time.strftime("%y-%m-%d-%H-%M") else: option.tag = option.exp_name if option.resplit: assert not option.no_extra_facts if option.accuracy: assert option.top_k == 1 os.environ["CUDA_VISIBLE_DEVICES"] = option.gpu tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) if not option.query_is_language: data = Data(option.datadir, option.seed, option.type_check, option.domain_size, option.no_extra_facts) else: data = DataPlus(option.datadir, option.seed) print("Data prepared.") option.num_entity = data.num_entity option.num_operator = data.num_operator if not option.query_is_language: option.num_query = data.num_query else: option.num_vocab = data.num_vocab option.num_word = data.num_word # the number of words in each query option.this_expsdir = os.path.join(option.exps_dir, option.tag) if not os.path.exists(option.this_expsdir): os.makedirs(option.this_expsdir) option.ckpt_dir = os.path.join(option.this_expsdir, "ckpt") if not os.path.exists(option.ckpt_dir): os.makedirs(option.ckpt_dir) option.model_path = os.path.join(option.ckpt_dir, "model") option.save() print("Option saved.") ## build the model learner = Learner(option) print("Model built.") saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = False config.log_device_placement = False config.allow_soft_placement = True with tf.Session(config=config) as sess: tf.set_random_seed(option.seed) sess.run(tf.global_variables_initializer()) print("Session initialized.") if option.from_model_ckpt is not None: saver.restore(sess, option.from_model_ckpt) print("Checkpoint restored from model %s" % option.from_model_ckpt) data.reset(option.batch_size) experiment = Experiment(sess, saver, option, learner, data) print("Experiment created.") if not option.no_train: print("Start training...") experiment.train() if not option.no_preds: print("Start getting test predictions...") experiment.get_predictions() if not option.no_rules: print("Start getting rules...") experiment.get_rules() if option.get_vocab_embed: print("Start getting vocabulary embedding...") experiment.get_vocab_embedding() experiment.close_log_file() print("=" * 36 + "Finish" + "=" * 36)
def getIncomeTable(strid): data = Data() #-------損益表 length_incomeTable = len(data.dates['incomeStatement']) incomeTable = data.get('營業收入合計', length_incomeTable)[strid].to_frame() incomeTable = incomeTable.reset_index() incomeTable['date'] = incomeTable['date'].apply(modifMonthtoSeason) copyIncomeTable = incomeTable.copy() #若為金控產業等要看淨收益 rawData = data.get('營業收入合計', length_incomeTable) allnanlist = rawData.columns[rawData.isna().any()] if strid in allnanlist: incomeTable = data.get('淨收益', length_incomeTable)[strid].to_frame() if math.isnan(incomeTable[strid][0]):#有些產業則是叫收入合計6005、6024待處理 incomeTable = data.get('收入合計', length_incomeTable)[strid].to_frame() incomeTable = incomeTable.reset_index() incomeTable['date'] = incomeTable['date'].apply(modifMonthtoSeason) copyIncomeTable = incomeTable.copy() counter1 = 0 for i in incomeTable['date']: if i.find('Q04') != -1:#是第四季 for j in range(counter1-1,counter1-3-1,-1): incomeTable.at[counter1, strid] = incomeTable.at[counter1, strid] - copyIncomeTable.at[j, strid] counter1+=1 else: counter1+=1 else: #檢查是否為第四季 counter = 0 for i in incomeTable['date']: if i.find('Q04') != -1:#是第四季 for j in range(counter-1,counter-3-1,-1): incomeTable.at[counter, strid] = incomeTable.at[counter, strid] - copyIncomeTable.at[j, strid] counter+=1 else: counter+=1 incomeTable.set_index('date', inplace=True) #---------稅前淨利 length_preTaxIncome = len(data.dates['incomeStatement']) preTaxIncome = data.get('繼續營業單位稅前淨利(淨損)', length_preTaxIncome)[strid].to_frame() preTaxIncome = preTaxIncome.reset_index() preTaxIncome['date'] = preTaxIncome['date'].apply(modifMonthtoSeason) copyPreTax = preTaxIncome.copy() counter_preTax = 0 for i in preTaxIncome['date']: if counter_preTax%4 != 0:#不是第一季 preTaxIncome.at[counter_preTax, strid] = preTaxIncome.at[counter_preTax, strid] - copyPreTax.at[counter_preTax-1, strid] counter_preTax+=1 else: counter_preTax+=1 preTaxIncome.set_index('date', inplace=True) #---------End稅前淨利 incomeStatementfig = [{ 'x' : incomeTable.index, 'y' : incomeTable[strid], 'type':'line', 'name':'營業收入', 'hovertemplate': "%{x}營業收入= %{y:$,}" },{ 'x' : preTaxIncome.index, 'y' : preTaxIncome[strid], 'type':'line', 'name':'稅前淨利', 'hovertemplate': "%{x}稅前淨利= %{y:$,}" }] #-------End 損益表 fig = go.Figure() fig.add_trace(go.Scatter(x=(incomeStatementfig[0])['x'], y=(incomeStatementfig[0])['y'], name='營業收入', hovertemplate='%{x}營業收入= %{y:$,}')) fig.add_trace(go.Scatter(x=(incomeStatementfig[1])['x'], y=(incomeStatementfig[1])['y'], name='稅前淨利', hovertemplate="%{x}稅前淨利= %{y:$,}")) fig.update_layout( plot_bgcolor = '#36404A', paper_bgcolor = '#36404A', font_color = '#7FDBFF' ) return fig
def compute_accuracy(data, predictions): ground_truth = np.array([ex['label'] for ex in data]) #Array of Label Dataset predictions = np.array(predictions) assert len(ground_truth) == len(predictions) return np.sum(np.equal(ground_truth, predictions)) / float( len(ground_truth)) if __name__ == '__main__': data = { 'train': Data('data/train2017.csv').get_train_data(), 'valid_2016': Data('data/valid2016.csv').get_validtest_data(), 'valid_2018': Data('data/valid2018.csv').get_validtest_data(), 'test': Data('data/test2016.csv').get_validtest_data() } embedded_data = dict() train_context_embs, train_ending_embs = np.random.rand( 5000, 768), np.random.rand(5000, 768) embedded_data['train'] = { 'context': train_context_embs, 'ending': train_ending_embs } valid_2016_context_embs, valid_2016_ending_0_embs, valid_2016_ending_1_embs = np.random.rand( 1871, 768), np.random.rand(1871, 768), np.random.rand(1871, 768) valid_2018_context_embs, valid_2018_ending_0_embs, valid_2018_ending_1_embs = np.random.rand(
def main_worker(rank, world_size, args): args.gpu = args.gpus[rank] if rank == 0: writer = SummaryWriter(osp.join('exp', args.exp)) print(f'==> Rank={rank}, Use GPU: {args.gpu} for training.') dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=world_size, rank=rank) torch.cuda.set_device(args.gpu) model = ModelDSR( object_num=args.object_num, transform_type=args.transform_type, motion_type='se3' if args.model_type != '3dflow' else 'conv', ) model.cuda() optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.95)) if args.resume is not None: checkpoint = torch.load(args.resume, map_location=torch.device(f'cuda:{args.gpu}')) model.load_state_dict(checkpoint['state_dict']) print(f'==> rank={rank}, loaded checkpoint {args.resume}') data, samplers, loaders = {}, {}, {} for split in ['train', 'test']: data[split] = Data(data_path=args.data_path, split=split, seq_len=args.seq_len) samplers[split] = torch.utils.data.distributed.DistributedSampler(data[split]) loaders[split] = DataLoader( dataset=data[split], batch_size=args.batch, num_workers=args.workers, sampler=samplers[split], pin_memory=False ) print('==> dataset loaded: [size] = {0} + {1}'.format(len(data['train']), len(data['test']))) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) for epoch in range(args.epoch): samplers['train'].set_epoch(epoch) lr = adjust_learning_rate(optimizer, epoch, args) if rank == 0: print(f'==> epoch = {epoch}, lr = {lr}') with torch.enable_grad(): loss_tensor_train = iterate(loaders['train'], model, optimizer, rank, args) with torch.no_grad(): loss_tensor_test = iterate(loaders['test'], model, None, rank, args) # tensorboard log loss_tensor = torch.stack([loss_tensor_train, loss_tensor_test]).cuda() torch.distributed.all_reduce(loss_tensor) if rank == 0: training_step = (epoch + 1) * len(data['train']) loss_tensor = loss_tensor.cpu().numpy() for i, split in enumerate(['train', 'test']): for j, loss_type in enumerate(args.loss_types): for step_id in range(args.seq_len): writer.add_scalar( '%s-loss_%s/%d' % (split, loss_type, step_id), loss_tensor[i, j, step_id] / len(data[split]), epoch+1) writer.add_scalar('learning_rate', lr, epoch + 1) if rank == 0 and (epoch + 1) % args.snapshot_freq == 0: visualize(loaders, model, epoch, args) save_state = { 'state_dict': model.module.state_dict(), } torch.save(save_state, osp.join(args.model_dir, 'latest.pth')) shutil.copyfile( osp.join(args.model_dir, 'latest.pth'), osp.join(args.model_dir, 'epoch_%d.pth' % (epoch + 1)) )
def __init__(self, transform=None): self.transform = transform self.data = Data() self.init_batch()
# set arguments l_r = args.l_r batch_size = args.batch_size pickle_dir = args.pickle_dir max_seq = args.max_seq epochs = args.epochs is_reuse = args.is_reuse load_path = args.load_path save_path = args.save_path multi_gpu = args.multi_gpu num_layer = args.num_layers # load data #dataset = Data('dataset/processed') dataset = Data(pickle_dir) print(dataset) # load model learning_rate = callback.CustomSchedule(par.embedding_dim) if l_r is None else l_r opt = Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) # define model mt = MusicTransformerDecoder( embedding_dim=256, vocab_size=par.vocab_size, num_layer=num_layer, max_seq=max_seq, dropout=0.2,
def main(): """ This is the main function that ties all other components together: """ # Read the cert data cert_data = "\n".join( list(c.wincerts.get_pems()) + list(c.wincerts.get_pems_wincertstore())).encode() # Write the cert data to a temporary file handle = tempfile.NamedTemporaryFile(delete=False) handle.write(cert_data) handle.flush() # Set the temporary file name to an environment variable for the requests package os.environ['REQUESTS_CA_BUNDLE'] = handle.name logging.basicConfig( format= '%(asctime)s %(name)-12s %(levelname)-8s %(filename)s %(funcName)s %(message)s', datefmt='%m-%d %H:%M:%S', level=logging.INFO, filename="amp_health_checker_log.log") logging.warning("AMP Health Checker logging level is %s", logging.getLevelName(logging.getLogger().level)) logging.debug("%s: Starting Health Checker", time.ctime()) try: settings_manager = SettingsManager() settings_manager.load_from_disk() except json.decoder.JSONDecodeError as e: errmsg = '%s: line %d column %d (char %d)' % (e.msg, e.lineno, e.colno, e.pos) sg.Popup( f"Configration file is not valid JSON. Cannot proceed.\n{errmsg}", title="AMP not found") logging.critical( "Configration file is not valid JSON. Cannot proceed.") exit(1) except SchemaError as e: sg.Popup( f"Configuration file contains bad Schema. Cannot Proceed.\n{e.code}", title="AMP not found") logging.critical( f"Configuration file contains bad Schema. Cannot Proceed. {e.code}" ) exit(1) except Exception as e: sg.Popup(f"Unknown Error. Cannot Proceed.\n{str(e)}", title="AMP not found") logging.critical(f"Unknown Error. Cannot Proceed. {str(e)}") exit(1) x_count = 0 button_size = (20, 1) layout = [ [ sg.Text("AMP Version: ", tooltip="The current AMP version running on the system."), sg.Text("Loading...", key='_version') ], [ sg.Text( "CPU Usage: ", tooltip="The current amount of CPU utilized by AMP executables." ), sg.Text("0", key='_cpu', size=(5, 1)) ], [ sg.Text("AMP Uptime: ", size=(10, 1)), sg.Text("", size=(27, 1), key="_uptime", tooltip="Time since AMP was last stopped") ], [ sg.Text( "Isolation: ", tooltip="Shows if the connector is Isolated or Not Isolated. " "Refresh with Refresh button."), sg.Text("", size=(12, 1), key="_isolated"), sg.Text( "", tooltip= "If Isolated, shows the unlock code. Requires valid API Credentials .", size=(17, 1), key="_unlock_code") ], [sg.Text('_' * 50)], [ sg.Text("TETRA Version: ", size=(11, 1)), sg.Text("", size=(8, 1), key="_tetra_version", tooltip="Shows the local TETRA version.\n" "Green if up to date.\n" "Yellow if not within last 5 or connectivity error " "to API.\nRed if TETRA is not enabled."), sg.Button( 'Check TETRA Version', size=button_size, button_color=default_button_color, key='_tetra_version_button', tooltip= "Checks the API to see if TETRA is up to date. Requires Valid API Credentials." ), sg.Text("", key="_latest_tetra_version", size=(8, 1)) ], [ sg.Text("Policy Serial: ", size=(11, 1)), sg.Text( "", size=(8, 1), key="_policy_version", tooltip="Shows the current policy serial number.\n" "Green if this matches the cloud version.\n" "Gray if there is a connectivity issue or invalid API Credentials.\n" "Red if the local policy doesn't match the cloud version. Try syncing policy." ), sg.Button( "Check Policy Version", size=button_size, button_color=default_button_color, key='_policy_version_button', tooltip="Checks the API to see if the policy is up to date."), sg.Text("", key="_latest_policy_version", size=(8, 1)) ], [ sg.Text( "API Credentials: ", size=(13, 1), tooltip='Shows if the currently stored API ' 'Credentials are valid. Can read from text file named "apiCreds.txt" in the local directory.\n' 'Must be in this format:\n' 'client_id="abcdabcdabcdabcdabcd"\n' 'api_key="abcd1234-abcd-1234-abcd-abcd1234abcd"'), sg.Text("", size=(6, 1), key="_api_cred_valid"), sg.Button("Add API Credentials", button_color=default_button_color, size=button_size, key="-API-CREDS-", tooltip="Allows user to manually input API Credentials.") ], [sg.Text('_' * 50)], [ sg.Button( "Live Debugging", button_color=default_button_color, size=button_size, tooltip= "Live analysis used for determining potential exclusions."), sg.Button( "Run Analysis", button_color=default_button_color, size=button_size, tooltip= "Runs analysis on the sfc.exe.log file to provide information on potential exclusions." ) ], [ sg.Button( "Live Top Processes", button_color=default_button_color, size=button_size, tooltip= "Shows the top processes seen on the system in a live view."), sg.Button( "Top IPs", button_color=default_button_color, size=button_size, tooltip= "Shows the top IP addresses seen on the system in a live view." ) ], [ sg.Button( "Connectivity Test", button_color=default_button_color, size=button_size, key="_connectivity_test", tooltip= "Test connection to the required servers for AMP operations."), sg.Button( "Check Engines", button_color=default_button_color, size=button_size, tooltip= "Provides a quick view of which AMP engines are enabled on the system." ) ], [ sg.Button( "View Exclusions", button_color=default_button_color, size=button_size, tooltip= "Shows the file and process exclusions from the local policy." ), sg.Button( "Manual SFC Analysis", button_color=default_button_color, size=button_size, tooltip= "Allows importing external sfc.exe.log files for analysis.") ], [ sg.Button( "Generate Diagnostic", button_color=default_button_color, size=button_size, tooltip= "Generate AMP diagnostic bundle with AMP Health Checker log. Both files " "will be on the desktop."), sg.Button( "Settings", button_color=default_button_color, size=button_size, tooltip= "Add settings view/file 4.1 credentials for api 4.2 api endpoint " "4.3 update endpoint 4.4 endpoint list") ], [ sg.Text( 'Log Level: ', tooltip= "Select higher log level if requested by the tool developers." ), sg.Button('INFO', button_color=active_button_color, key='_INFO'), sg.Button('WARNING', button_color=default_button_color, key="_WARNING"), sg.Button('DEBUG', button_color=default_button_color, key="_DEBUG") ], [sg.Text('', size=(8, 1))], [ sg.Text('', size=(13, 1)), sg.Button( "Refresh", size=(7, 1), button_color=default_button_color, tooltip="Refreshes calculated data, including Isolation Status." ), sg.Button("Cancel", button_color=default_button_color, tooltip="Exits the program.") ] ] logging.debug('test') window = sg.Window("AMP Health Check", layout, size=(480, 540), margins=(60, 10)) is_first = True d_instance = Data(settings_manager) while True: if is_first: event, values = window.Read(timeout=0) logging.debug('Event - %s : Values - %s', event, values) is_first = False else: event, values = window.Read(timeout=5000) if x_count < 10: x_count += 1 else: if d_instance.api_cred_valid: d_instance.update_api_calls(settings_manager) x_count = 0 d_instance.update(settings_manager) logging.debug('Self Scan Count = %s', d_instance.internal_health_check) window.FindElement('_version').Update(d_instance.version) window.FindElement('_cpu').Update(d_instance.current_cpu) window.FindElement('_uptime').Update(d_instance.converted_uptime) window.FindElement('_tetra_version').Update( d_instance.tetra_version_display) window.FindElement('_policy_version').Update( d_instance.policy_dict['policy_sn']) window.FindElement('_api_cred_valid').Update('Valid' if d_instance.api_cred_valid \ else 'Invalid') window.FindElement('_isolated').Update(d_instance.isolated) window.FindElement('_unlock_code').Update(d_instance.unlock_code) if event in (None, "Cancel"): break elif event == "_INFO": logging.getLogger().setLevel(logging.INFO) logging.info('Log level changed to %s', logging.getLevelName(logging.getLogger().level)) window.FindElement('_INFO').Update( button_color=active_button_color) window.FindElement('_WARNING').Update( button_color=default_button_color) window.FindElement('_DEBUG').Update( button_color=default_button_color) window.Refresh() elif event == '_WARNING': logging.getLogger().setLevel(logging.WARNING) logging.warning('Log level changed to %s', logging.getLevelName(logging.getLogger().level)) window.FindElement('_INFO').Update( button_color=default_button_color) window.FindElement('_WARNING').Update( button_color=active_button_color) window.FindElement('_DEBUG').Update( button_color=default_button_color) d_instance.verify_api_creds(settings_manager) window.Refresh() elif event == '_DEBUG': logging.getLogger().setLevel(logging.DEBUG) logging.debug('Log level changed to %s', logging.getLevelName(logging.getLogger().level)) window.FindElement('_INFO').Update( button_color=default_button_color) window.FindElement('_WARNING').Update( button_color=default_button_color) window.FindElement('_DEBUG').Update( button_color=active_button_color) d_instance.verify_api_creds(settings_manager) window.Refresh() elif event == "Live Debugging": popups.lpap(d_instance, settings_manager) elif event == "Live Top Processes": popups.just_process(d_instance, settings_manager) elif event == "_tetra_version_button": popups.check_latest_tetra(d_instance, window, settings_manager) elif event == "_policy_version_button": popups.check_latest_policy(d_instance, window, settings_manager) elif event == "_connectivity_test": popups.connectivity(d_instance, settings_manager) elif event == "Check Engines": popups.engines_enabled(d_instance) elif event == "View Exclusions": popups.view_exclusions(d_instance) elif event == "Run Analysis": popups.analysis(d_instance, settings_manager) elif event == "Top IPs": popups.topips(d_instance, settings_manager) elif event == "Refresh": d_instance.reset_data(settings_manager) window.Refresh() elif event == "-API-CREDS-": popups.get_api_credentials(d_instance, settings_manager) elif event == "Manual SFC Analysis": popups.manual_sfc(d_instance, settings_manager) elif event == "Generate Diagnostic": d_instance.generate_diagnostic() if any(d_instance.diag_failed): popups.diag_failed_popup(d_instance.diag_failed) elif event == "Settings": popups.settings(settings_manager) if d_instance.enabled_debug: d_instance.disable_debug() # update window.close()
def setVal(self, i, val): self.data[i] = Data(val)
class GenMember(object): """ Class that is used to create valid mathematical expressions, get the fitness of the each of the individuals in the population, select two parents, and also to update the population once the children are ready to be added into the new population. """ # Read the data from the text file d = Data('dataset2.txt') read_data = d.read_data(shuffle_d=False) data = read_data[0] labels = read_data[1] # the set of functional values. - consider expanding this. operations = ['+', '-', '*', '/'] def generate_expression(self, max_depth=4): """ Function to generate a valid mathematical expression. An expression consists of values from the functional set -> ['+', '-', '*', '/'] and values from a terminal set -> [random number between 0-50, X1,...,X5] where X1,..., are Altman's KPI ratios. :param max_depth: maximum depth of the regression tree. :return: valid expression <= maximum depth of tree. """ # print out either a random number between 0 and 50, or a variable X1-X5. if max_depth == 1: terminals = [random() * 50, "X1", "X2", 'X3', "X4", "X5"] # random() * 50, return self.__str__(choice(terminals)) # include bracketing 20% of the time. rand = random() if rand <= 0.2: return '(' + self.generate_expression(max_depth - 1) + choice( self.operations) + self.generate_expression(max_depth - 1) + ')' else: return self.generate_expression(max_depth - 1) + choice( self.operations) + self.generate_expression(max_depth - 1) def __str__(self, num): """ cast terminal value to a string. :param num: the value to be parsed as a string. :return: value parsed as a string """ return str(num) def get_valid_expressions(self, max_depth, population_size): """ function to ensure that each initial member of the population contains at least the variables X1,..,X5. :param max_depth: maximum depth of the tree. :param population_size: generate a user defined population size. :return: every individual in population as a list of strings. """ expression_list = list() while len(expression_list) < population_size: # generate the expressions and cast them to strings. init = GenMember() exps = init.generate_expression(max_depth) str_exps = str(exps) expression_list.append(str_exps) # print out valid expressions which contain all the variables. expression_list = [ item for item in expression_list if 'X1' and 'X2' and 'X3' and 'X4' and 'X5' in item ] return expression_list def get_fitness(self, expressions, child=False): """ Function to get the fitness of the population. Fitness function based on Number of Hits method. :param expressions: list of expressions being passed in. If not first iteration, then expression comes in as a single expression string and is converted to a list containing the child expression to be evaluated. :param child: if child is false, then assume first iteration -> get fitness of whole population. If child is true, then only get fitness of new children values, not total population. :return: """ if child is True: exp_list = list() exp_list.append(expressions) expression = exp_list else: expression = expressions # get all the rows of the data being passed in to get the fitness. row = np.asarray(GenMember.data, dtype=object) # transpose the data to get all the X1 values in a list and repeat for X2,...,X5 new_row = row.T # get the labels of the company data. labels = GenMember.labels # store the data in the variables to make evaluation of expression easier. X1 = new_row[0] # length = len of data set X2 = new_row[1] X3 = new_row[2] X4 = new_row[3] X5 = new_row[4] predictions = list() for ex in expression: tmp = list() try: # evaluate the expression x = eval(ex) # if evaluation does not contain any variables from the terminal set if isinstance(x, float) or isinstance(x, int): for l in range(len(X1)): tmp = [x] * len(X1) predictions.append(tmp) else: # if the total is greater than 0 i.e. positive, append 0, else 1 for j in x: if j >= 0: tmp.append(1) else: tmp.append(0) predictions.append(tmp) # if expression contains "/0" throw ZeroDivisionError and give individual a poor fitness. except ZeroDivisionError: # print("cannot divide by 0!!!") for k in range(len(X1)): tmp = [9999] * len(X1) predictions.append(tmp) # get number of hits fitness. noh = list() for k in range(len(predictions)): tmp = list() [ tmp.append(labels[j] == predictions[k][j]) for j in range(len(predictions[k])) ] noh.append(tmp) fitness = [len(j) - sum(j) for j in noh] return fitness def tournament_selection(self, population, fitness, selection_size): """ Function to select the parents of the population using tournament selection. Select n individuals from the population at random, and select the best two individuals from the selection to be the parents. :param population: the population generated - the list of expressions :param fitness: the population fitnesses :param selection_size: the number of individuals to compete against each other :return: two parents that will be used to create offspring - type: list(strings) """ zipped_population = list(zip(population, fitness)) # print("zipped population: ", zipped_population) # select potential candidate solutions to be assessed. candidates = sample(zipped_population, selection_size) # print("candidates:",candidates) # select the first parent with the best fitness out of the candidates parent_one = min(candidates, key=lambda t: t[1]) # print(parent_one) p1_index = zipped_population.index(parent_one) # print(p1_index) # remove parent for now to prevent parent being selected twice zipped_population.pop(p1_index) # print("new popilation:", zipped_population) candidates = sample(zipped_population, selection_size) # select another sample and get the second parent parent_two = min(candidates, key=lambda t: t[1]) p2_index = zipped_population.index(parent_two) zipped_population.pop(p2_index) # return the parents as a list of strings. parents = list() parents.append(parent_one) parents.append(parent_two) return parents def select_best_parents(self, population, fitness): """ Function to select the best two parents in each the current population to be put forward for crossover adn mutation. :param population: the current population :param fitness: fitness of the current population :return: the two parents. """ zipped_population = list(zip(population, fitness)) parent_one = min(zipped_population, key=lambda t: t[1]) p1_index = zipped_population.index(parent_one) zipped_population.pop(p1_index) parent_two = min(zipped_population, key=lambda t: t[1]) p2_index = zipped_population.index(parent_two) zipped_population.pop(p2_index) parents = list() parents.append(parent_one) parents.append(parent_two) return parents def update_population(self, population, fitness, c1, child_fit1, c2, child_fit2): """ Function to update the population, by comparing the two worst individuals in the current population, with the two new children produced. Insert the children into the population if they have a better fitness relative to the two worst in the population to improve the population fitness. :param population: the current population :param fitness: fitness of each individual in the current population :param c1: first child produced :param child_fit1: first child produced fitness :param c2: second child produced :param child_fit2: second child produced fitness :return: the new updated population with the new population fitnesses. """ # print("current population") # print(population) # print("fitenss: ") # print(fitness) child1 = list() child2 = list() child1.append(c1) child2.append(c2) zipped_population = list(zip(population, fitness)) # print("zipped popn",zipped_population) child2 = list(zip(child2, child_fit2)) # print("child2: ", child2) # # print("worst candidate 1: ") worst_one = max(zipped_population, key=lambda t: t[1]) w1_index = zipped_population.index(worst_one) # print("worst one: ", worst_one) # if the child fitness is better than the worst in the population, replace them with first child if child_fit1[0] <= worst_one[1]: zipped_population.pop(w1_index) zipped_population.append((c1, child_fit1[0])) # if the child fitness is better than the worst in the population, replace them with first child worst_two = max(zipped_population, key=lambda t: t[1]) w2_index = zipped_population.index(worst_two) # print("worst2: ", worst_two) if child_fit2[0] <= worst_two[1]: zipped_population.pop(w2_index) zipped_population.append((c2, child_fit2[0])) # print("zipped population: ", zipped_population) new_population = [i[0] for i in zipped_population] new_population_fitness = [i[1] for i in zipped_population] return new_population, new_population_fitness
def parse_parser_results(text): """ This is the nasty bit of code to interact with the command-line interface of the CoreNLP tools. Takes a string of the parser results and then returns a Python list of dictionaries, one for each parsed sentence. """ data = Data() state = STATE_START #for line in re.split("\r\n(?![^\[]*\])",text): for line in re.split("\r\n", text): line = line.strip() if line == 'NLP>': break if line.startswith("Sentence #"): state = STATE_TEXT elif state == STATE_TEXT: Data.newSen() data.addText(line) state = STATE_WORDS elif state == STATE_WORDS: if len(line) == 0: continue if not line.startswith("[Text="): raise Exception('Parse error. Could not find "[Text=" in: %s' % line) for s in WORD_PATTERN.findall(line): t = parse_bracketed(s) if t[0] == '': continue data.addToken(t[0], t[1][u'CharacterOffsetBegin'], t[1][u'CharacterOffsetEnd'], t[1][u'Lemma'], t[1][u'PartOfSpeech'], t[1][u'NamedEntityTag']) state = STATE_TREE parsed = [] elif state == STATE_TREE: if len(line) == 0: state = STATE_DEPENDENCY parsed = " ".join(parsed) #data.addTree(Tree.parse(parsed)) else: parsed.append(line) elif state == STATE_DEPENDENCY: if len(line) == 0: state = STATE_COREFERENCE else: pass ''' # don't need here split_entry = re.split("\(|, ", line[:-1]) if len(split_entry) == 3: rel, l_lemma, r_lemma = split_entry m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', l_lemma) l_lemma, l_index = m.group('lemma'), m.group('index') m = re.match(r'(?P<lemma>.+)-(?P<index>[^-]+)', r_lemma) r_lemma, r_index = m.group('lemma'), m.group('index') data.addDependency( rel, l_lemma, r_lemma, l_index, r_index) ''' elif state == STATE_COREFERENCE: if "Coreference set" in line: #if 'coref' not in results: # results['coref'] = [] coref_set = [] data.addCoref(coref_set) else: for src_i, src_pos, src_l, src_r, sink_i, sink_pos, sink_l, sink_r, src_word, sink_word in CR_PATTERN.findall( line): src_i, src_pos, src_l, src_r = int(src_i), int( src_pos), int(src_l), int(src_r) sink_i, sink_pos, sink_l, sink_r = int(sink_i), int( sink_pos), int(sink_l), int(sink_r) coref_set.append( ((src_word, src_i, src_pos, src_l, src_r), (sink_word, sink_i, sink_pos, sink_l, sink_r))) return data
from data import Data test = Data("unsupervised", preprocess=False) test.save_csv_separate("test\\", "_test")
for i in range(10): img_path = gallery_path[index[i]] print(img_path) ax = plt.subplot(1, 11, i + 2) ax.axis('off') plt.imshow(plt.imread(img_path)) ax.set_title(img_path.split('/')[-1][:9]) fig.savefig("show.png") print('result saved to show.png') if __name__ == '__main__': data = Data() model = MGN() loss = Loss() main = Main(model, loss, data) if opt.mode == 'train': for epoch in range(1, opt.epoch + 1): print('\nepoch', epoch) main.train() if epoch % 50 == 0: print('\nstart evaluate') main.evaluate(epoch) os.makedirs('weights', exist_ok=True) torch.save(model.state_dict(), ('weights/model_{}.pt'.format(epoch)))
# Quick'n'dirty SSL certificate generation: # # openssl genrsa -out webhook_pkey.pem 2048 # openssl req -new -x509 -days 3650 -key webhook_pkey.pem -out webhook_cert.pem # # When asked for "Common Name (e.g. server FQDN or YOUR name)" you should reply # with the same value in you put in WEBHOOK_HOST WEBHOOK_URL_BASE = "https://%s:%s" % (WEBHOOK_HOST, WEBHOOK_PORT) WEBHOOK_URL_PATH = "/%s/" % (API_TOKEN) logger.setLevel(logging.INFO) bot = TeleBot(API_TOKEN) data = Data(bot=bot) system = System(data=data) client = Client(data=data) main_menu = Main(data=data) order = Order(data=data, client=client) channel = Channel(data=data, client=client) redaction = Redaction(data=data, order=order) account = Account(data=data) app = flask.Flask(__name__) # Empty webserver index, return nothing, just http 200 @app.route('/', methods=['GET', 'HEAD']) def index():