def project_part_2_prepare(): labelled_data = read_file('CN/train', type="with_label") pr(labelled_data[:3]) word_set = set(it[0] for sequence in labelled_data for it in sequence) label_set = set(it[1] for sequence in labelled_data for it in sequence) emission_parameter, c1, c2 = emission_parameter_calcul_on_train_set( labelled_data, label_set, word_set) pr(emission_parameter["O"]["高兴"]) from collections import Counter words = [it[0] for sequence in labelled_data for it in sequence] word_count = Counter(words) print("count of 高兴 is : " + str(word_count["高兴"])) print("e(O->高兴) is ") print(emission_parameter["O"]["高兴"]) emission_parameter["O"]["撒达到"] = emission_parameter_calcul( "O", "撒达到", count_set=(c1, c2, word_set), labelled_data=labelled_data) print( "撒达到 is an new word,which hasn't been in the train set,count of its occurance is :" + str(word_count["撒达到"])) print("e(O->撒达到) is : ") print(emission_parameter["O"]["撒达到"]) print( "according to the special case handling they all appears to be 1 time, and the latter one is slightly smaller(which is also consistent with the algotithm)" ) return
def process_example(): records = json.load(open("./data/example.json")) d = {} d['queryID'] = 1 d["data"] = records pr(d) result = process(d) pr(result) return
def primeNear(max): #nums = [] pr("cur: ") lastNum = 0 for cur in range(2, max + 1): if checkPrime(cur): lastNum = cur #pr("{}".format(cur), end = ("\n" if cur%20==0 else " ")); pr("") return lastNum # nums[-1]
def train_w2v(sentences): word_set = set([it for re in sentences for it in re]) word_map = {word: str(c) for c, word in enumerate(word_set)} sentences = [[word_map[word] for word in sentence] for sentence in sentences] pr(word_map) # pr(sentences) voca_size = len(word_map) pr(voca_size) model = gensim.models.Word2Vec(sentences, min_count=1, size=VECTOR_DIMENSION) # model.save('/tmp/mymodel') # new_model = gensim.models.Word2Vec.load('/tmp/mymodel') # model=model.wv # mapping from word to index index_to_vector = dict() f_write = open("./model/vectors_txt_format.txt", "w") for i in range(voca_size): if str(i) in model: f_write.write( str(i) + ":" + " ".join([str(temp) for temp in model[str(i)]]) + "\n") index_to_vector[str(i)] = [str(temp) for temp in model[str(i)]] else: print(str(i) + " is not in vocabulary") f_write.close() print(len(index_to_vector)) # save two file # 1. mapping dictionary # 2. vectors corresponding to each index with open("model/word_map.dict", "wb") as f: pickle.dump(word_map, f) with open("model/vectors.dict", "wb") as f: pickle.dump(index_to_vector, f) # in case of danger, save the model either with open("./model/genism_model_vector.test.db", "wb") as f: model = gensim.models.Word2Vec(sentences, size=100, window=5, workers=4) pickle.dump(model, f) return
def multarg(*args, **args2): for v in args: pr(v, end=', ') pr("\n---") keys = sorted(args2.keys()) for k in keys: pr("{} : {}".format(k, args2[k])) pr("---")
def main(): # return app.send_static_file('base.html') # for it in request.files: # f=request.files[it] # f.save('./'+"temp_working.json") records = request.json print("got a sequence of data") pr(records) # records=json.load(open("./temp_working.json")) result = process(records) # try: pr(result) header = {"content-type": "application/json"} r = requests.post("http://115.159.91.188:3000/statue", json=result, headers=header) # except: # print("sending data back error") return json.dumps(result)
def fill_placeholders(intents, plugs): new = [] start_id = 1 for plug in plugs: for intent in intents: text = intent['text'] tags = get_tags(text.split()) # text = re.sub(r'{(.+?):}', r'{\1:'+plug+'}', text) text = re.sub(r'{(.+?):}', plug, text) obj_model = { 'id': start_id, 'text': text, 'tags': tags, 'intent': intent['intent'] } # pr(obj_model) new.append(obj_model) start_id += 1 if (start_id % 1000 == 0): pr(start_id) return new
'state_context_input': X_s, 'observation_context_input': X_o, 'current_input': X_current }, batch_size=32, verbose=2) return predictions if __name__ == "__main__": word_map, vectors = load_meta_model() index_to_word = {item: key for key, item in word_map.items()} records = load_labeled_data("./data/tmp.json") model = load_model("./model/dis_enc_model.model") for i in range(10): record = records[i] sentence = record["content"] print("raw sentence is ") print("".join(sentence)) real_keys = record["key"] print("real keywords as") print(real_keys) words = sentence pro = conditiaonal_probability(words, sentence, [], model) pr(pro) quit()
print(joinList) # %% # Unpacking dictionary dictioOfPoint = dict(c=10, f=20) dictioOfPoint2 = dict(c=2, t=50) joinDictio = {**dictioOfPoint, **dictioOfPoint2, "zz": 120} print(joinDictio) # %% # Task find char with biggest occurrence testString = "This is a common interview question" listOfChar = [*testString] listOfWords = testString.split(" ") print(listOfWords) # %% discWithNumberOfChar = {} for char in listOfChar: if char in discWithNumberOfChar: discWithNumberOfChar[char] += 1 else: discWithNumberOfChar[char] = 1 maxInDisc = max(discWithNumberOfChar, key=discWithNumberOfChar.get) pr(discWithNumberOfChar, width=1) print("Max is: ", maxInDisc, "with number of ocurr: ", discWithNumberOfChar[maxInDisc]) # [discWithNumberOfChar[item]+=1 for item in listOfChar] # print(discWithNumberOfChar)
if verbose: print "Host %s directory %s download finished:" % (self.conn.host, rdir) print "%d directories, %d(%d failed) files, %d unknown type." % (numDir, numFile, numDownErr, numUnknown) return numDir, numFile, numUnknown, numDownErr if __name__ == "__main__": import sys import traceback from pprint import pprint as pr flog = open("err.log", "wb") def run(host): try: fd = FtpDownloader(host=host, user="******", passwd="test", port=21, timeout=10) numDir, numFile, numUnknown, numDownErr = fd.downloadDir( rdir=".", ldir="download", tree=None, errHandleFunc=None, verbose=True ) flog.write( "%s\nok\n" "%d directories, %d(%d failed) files, %d unknown type\n\n\n" % (host, numDir, numFile, numDownErr, numUnknown) ) except Exception as err: traceback.print_exc() flog.write("%s\nerror\n%s\n\n\n" % (host, traceback.format_exc())) pr(run(sys.argv[1])) flog.close()
if __name__ == '__main__': import sys import traceback from pprint import pprint as pr flog = open('err.log', 'wb') def run(host): try: fd = FtpDownloader(host=host, user='******', passwd='test', port=21, timeout=10) numDir, numFile, numUnknown, numDownErr = fd.downloadDir( rdir='.', ldir='download', tree=None, errHandleFunc=None, verbose=True) flog.write( '%s\nok\n' '%d directories, %d(%d failed) files, %d unknown type\n\n\n' % (host, numDir, numFile, numDownErr, numUnknown)) except Exception as err: traceback.print_exc() flog.write('%s\nerror\n%s\n\n\n' % (host, traceback.format_exc())) pr(run(sys.argv[1])) flog.close()
def getBatchable(self): sql = 'SELECT * FROM poc WHERE batchable=1' self.cursor.execute(sql) return self.cursor.fetchall() def countAll(self): sql = 'SELECT count(*) FROM poc' self.cursor.execute(sql) return self.cursor.fetchone() if __name__ == '__main__': # testing code sys.path.append('../') from SETTINGS import FRAMEWORK_DIR sys.path.append(FRAMEWORK_DIR) from pprint import pprint as pr db = Database(dbFilePath='../hive.db', pocDir='../pocs/') #print db.updtDbFromBB2Db(bb2DbFile='../pocdb.json') #print db.updtDbFromPocs(pocDir='../pocs') #pr(db.searchStr(item='discuz')) #pr(db.countAll()) #pr(db.searchPoc(pocId='poc-2014-0019')) #pr(db.getBatchable()) pr(db.updtDbFromJson(jsonFile='../pocdb.json'))
epochs=5, batch_size=32, verbose=2) model.save('model/lstm_enc_model.model', overwrite=True) print("Saved model to disk.") # with open("model/lstm_enc_model.model","wb") as f: # pickle.dump(model,f,protocol=2) score = model.evaluate( { 'state_context_input': X_s_test, 'observation_context_input': X_o_test }, {'predictions': Y_test}, batch_size=128) pr(score) print() quit() print(np.min(X_train), np.max(X_train)) print('X_train shape:', X_train.shape) print(X_train.shape[0], 'train samples') print(X_test.shape[0], 'test samples') shape = X_train.shape[1:] print(shape) dropout_rate = 0.25 opt = Adam(lr=1e-4) dopt = Adam(lr=1e-3) ntrain = 10000
from event import Event from pprint import pprint as pr from selenium import webdriver # def sele_phantom(): # url = "https://tanzu.vmware.com/content/webinars/jun-30-making-k8s-great-improving-the-kubernetes-developer-experience?utm_campaign=Global_BT_Q221_Improving-K8s-Developer-Experience&utm_source=twitter&utm_medium=social" # browser = webdriver.PhantomJS() # browser.get(url) # # iframe = browser.find_element_by_tag_name("iframe") # print(iframe) # browser.switch_to.default_content() # browser.switch_to.frame(iframe) # # iframe_source = browser.page_source # # print(iframe_source) # # print(browser.current_url) if __name__ == "__main__": keywords = "kubernetes" event = Event(keywords) # start crawler data = event.start() pr(data) # sele_phantom()
X_o = np.array([np.vstack(np.asarray(it)) for it in X[:, 1]]) X_s = np.array([np.concatenate(np.array(it)) for it in X[:, 0]]) return X_o, X_s if __name__ == "__main__": word_map, vectors = load_meta_model() index_to_word = {item: key for key, item in word_map.items()} records = load_labeled_data("./data/tmp.json") records = preprocessing.load_labeled_data("./data/tmp.json") temp = [re["key"] for re in records] pr(temp[:10]) class_number = find_class_number() X, Y = construct_train_data(records) total_number = len(X) Y = keras.utils.to_categorical(Y, num_classes=class_number + 1) X_train = X[:10, :] Y_train = Y[:10] X_test = X[int(total_number * 0.9):, :] Y_test = Y[int(total_number * 0.9):] X_o = np.array([np.vstack(np.array(it)) for it in X_train[:, 0]]) print(X_o.shape) X_s = np.array([np.concatenate(np.array(it)) for it in X_train[:, 0]]) print(X_s.shape)
try: inputValue = int(input("Age:")) xFactor = 10 / inputValue except Exception as ex: print("Age have to be number") print(ex) print(type(ex)) # print("Age is", inputValue) # %% from pprint import pprint as pr try: file = open("App.py") fileContent = file.readlines() pr(fileContent, width=80) ageVal = 0 factor = 10 / ageVal except Exception as ex: print(ex) print(type(ex)) finally: file.close() # %% # example of using block alternative in python: with clousule. # Idisposible : object has two magic methods __enter__ and __exit__ try: with open("App.py") as file: for line in file.readlines(): if line.strip():
import math from pprint import pr pr.print_on = False pr("hello!") pr("циферки: {}, {}".format(11, 22)) s = '''hello 2 ''' pr(s) for i in range(10, 20, 3): if i % 2 == 0: pr("even: {}".format(i)) else: pr("odd: {}".format(i)) words = ['aaa', 'bbb', 'ccc', 'ddd'] for w in words: pr(w) for i in range(len(words)): pr("{} : {}".format(i, words[i])) for x in list(range(10)): if x % 2 == 0: pr("- {}".format(x)) else: pass
def read_and_print(file="yamlread2.yaml") -> {}: with open(file, 'r') as fp: d = yaml.load(fp, Loader=yaml.FullLoader) pr(d) print("\n\n\n") return d
from pprint import pr pr.print_on = True # printOn = True # def pr(*args, **kwargs): # if printOn: print(*args, **kwargs) pr('1: %d, 2: %d, 3: %s' % (111, 222, 'ccc')) pr('name: %(name)s, age: %(age)d' % {'name': 'Vasya', 'age': 25}) name = 'Osya' pr(f'var name = {name}')
valid_record = detect_and_delete(content) com_keys = key_word_extract(".".join( ["".join(rec["content"]) for rec in valid_record])) keys = dict() for i in range(10): try: keys["KeyCom" + str(i + 1)] = com_keys[i] except: keys["KeyCom" + str(i + 1)] = "" return len(valid_record), keys, valid_record if __name__ == "__main__": # raw = load_raw_data("./data/jd_comment_items.json") # print("now is with label") # print(len(raw)) # raw = make_fake_labels(raw) # raw=detect_and_delete(raw) # # save_as_file(raw) # pr(raw[0:5]) # print(len(raw)) # records = load_labeled_data("./data/tmp.json") pr(records[0:5]) sentences = [record["content"] for record in records] train_w2v(sentences) # label_data = load_labeled_data("./data/example.json")
if resp: print('Responce is correct') else: print('bad response') print(resp.headers) # return all headers print(resp.text) # return all of page with open('resp.html', 'w') as file: file.write(resp.text) r = req.get( 'https://i2.wp.com/itc.ua/wp-content/uploads/2018/09/3-1.jpg?fit=830%2C460&quality=100&strip=all&ssl=1' ) with open('logo.png', 'wb') as f: f.write(r.content) payload = {'username': '******', 'password': '******'} auth = req.post('http://httpbin.org/post', data=payload) auth_dict = auth.json() # pr(auth_dict) pr(auth_dict['form']) basic_auth = req.get('http://httpbin.org/basic-auth/test/pass', auth=('test', 'pass')) print(basic_auth.text) timeout_req = req.get('https://httpbin.org/delay/2', timeout=2) pr(timeout_req) # python3 requests_demo.py > page.html
self.size = buckets def insert(self, node): key = hash_fn(node.data, self.size) if key not in self.buckets: self.buckets[key] = [node.data] else: self.buckets.get(key).append(node.data) def remove(self, node): key = hash_fn(node.data, self.size) if key in self.buckets: if node.data in self.buckets[key]: self.buckets[key].remove(node.data) else: raise ValueError("No such data!") else: raise KeyError("No such key!") if __name__ == '__main__': ht1 = HashTable() datas = [ ''.join([choice('qazwsxcderfvbgtyhnmjuiklop') for _ in range(10)]) for _ in range(20) ] for data in datas: ht1.insert(Node(data)) pr(ht1.buckets)
sql = 'SELECT * FROM poc WHERE batchable=1' self.cursor.execute(sql) return self.cursor.fetchall() def countAll(self): sql = 'SELECT count(*) FROM poc' self.cursor.execute(sql) return self.cursor.fetchone() if __name__ == '__main__': # testing code sys.path.append('../') from SETTINGS import FRAMEWORK_DIR sys.path.append(FRAMEWORK_DIR) from pprint import pprint as pr db = Database(dbFilePath='../hive.db', pocDir='../pocs/') #print db.updtDbFromBB2Db(bb2DbFile='../pocdb.json') #print db.updtDbFromPocs(pocDir='../pocs') #pr(db.searchStr(item='discuz')) #pr(db.countAll()) #pr(db.searchPoc(pocId='poc-2014-0019')) #pr(db.getBatchable()) pr(db.updtDbFromJson(jsonFile='../pocdb.json'))
self.__result[target_name].append(data) except KeyError, e: self.__result[target_name] = [] self.__result[target_name].append(data) else: self.__result[target_name] = data self.__getdata = False # Method used to get tag details information, in a contained way. def get_tag_details(self, tag): try: return self.__parsing_pattern[tag] except KeyError: return None # Example data parsing_pattern = { 'p': { 'target_name':'test', 'attributes' : [('class', 'test'),], 'multiple_return_values':True, 'type':'data', 'subtags':None, }, } test_parser = sax_style_HTML_Parser(parsing_pattern) result = test_parser.parse("<!DOCTYPE html><html><a hmm='kage'>hej</a><p class='test'>test</p><b>hmm</b><p class='test'>Test 200</p></html>") pr(result)
import traceback from pprint import pprint as pr flog = open('err.log', 'wb') def run(host): for x in range(1, 15): try: fd = FtpDownloader(host='figment.csee.usf.edu', user='******', passwd='', port=21, timeout=10) numDir, numFile, numUnknown, numDownErr = fd.downloadDir( rdir='/pub/DDSM/cases/benigns/benign_%02d/' % (x), ldir='./download/', tree=None, errHandleFunc=None, verbose=True) flog.write( '%s\nok\n' '%d directories, %d(%d failed) files, %d unknown type\n\n\n' % (host, numDir, numFile, numDownErr, numUnknown)) except Exception as err: traceback.print_exc() flog.write('%s\nerror\n%s\n\n\n' % (host, traceback.format_exc())) pr(run(sys.argv[0])) flog.close()
[ struct.pack(">I", 1), # msgid "\x00" * 4, # msgcall "\x00\x00\x00\x02", # rpc version "\x55" * 4, # wdb programe number "\x00\x00\x00\x01", # programe version struct.pack(">I", 123), # function number: WDB_TGT_INFO_GET = 123 "\x00" * 16, "\x00" * 4, "\x00\x00\x00\x44", # packet length struct.pack(">I", 1), # msg seq "\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00", # parameter ] ) try: sock.sendto(infoReq, (host, port)) resp2 = sock.recv(65536) except socket.timeout as err: resp2 = "" return "vxworks" in resp2.lower(), resp1, resp2 if __name__ == "__main__": import sys from pprint import pprint as pr pr(scanV1(sys.argv[1])) print pr(scanV2(sys.argv[2]))