def vigenere(text, key, mode='encode'): alen = len(alphabet) key = cycle(key + key[-2:0:-1]) op = add if mode == 'encode' else sub return ''.join(alphabet[op(alphabet.index(c), alphabet.index(k)) % alen] for c, k in izip(text, key))
def methinks(self, genotype): difference = 0 for i in range(self.individual_size): difference += abs( printable.index(self.values['sentence'][i]) - printable.index(genotype[i])) return difference
def generate_account(email_class: str = '@qq.com') -> tuple: """ :param email_class: @qq.com @gmail.com ... """ # 账号信息 username = ''.join([random.choice(printable[:printable.index('!')]) for _ in range(9)]) password = ''.join([random.choice(printable[:printable.index(' ')]) for _ in range(15)]) email = username + email_class return username, password, email
def __create__(): # 账号信息 username = ''.join([ random.choice(printable[:printable.index('!')]) for i in range(9) ]) password = ''.join([ random.choice(printable[:printable.index(' ')]) for j in range(15) ]) email = username + '@qq.com' return username, password, email
def predict(self, x_input): url_int_tokens = [[ printable.index(x) + 1 for x in x_input if x in printable ]] X = sequence.pad_sequences(url_int_tokens, maxlen=self.max_len) p = self.model.predict(X, batch_size=1) return "benign" if p < 0.5 else "malicious"
def load_data(__self__): # Load data DATA_HOME = '../data/' df = pd.read_csv(DATA_HOME + 'dga_label.csv', encoding='ISO-8859-1', sep=',') # Convert domain string to integer # URL 알파벳을 숫자로 변경 url_int_tokens = [[ printable.index(x) + 1 for x in url if x in printable ] for url in df.domain] sample_word = df.domain[0] sample_word_len = len(url_int_tokens[0]) # Padding domain integer max_len=74 # 최대길이 77로 지정 max_len = 77 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) y = np.array(df['class']) # Cross-validation #X_train, X_test, y_train0, y_test0 = model_selection.train_test_split(X, y, test_size=0.1, random_state=33) # dga class: 0~20: 21개 y = np_utils.to_categorical(y, 21) return X, y, sample_word, sample_word_len
def symmetric_decrypt(message, key): """ Uses key values to unshift the characters in the encrypted message and return the original plaintext Input: key <printable string> message <printable string> Output: decrypted_message <printable string> """ key_cypher = [ord(character) for character in key] decrypted_message = "" for index, character in enumerate(message): # Get the index of a character in message message_char_index = printable.index(character) # Calculate shift amount from the key index_shift = key_cypher[index % len(key_cypher)] # Left-shift the message character encrypted_char_index = (message_char_index - index_shift) % len(printable) # Add decrypted character to the message decrypted_message += printable[encrypted_char_index] return decrypted_message
def symmetric_encrypt(message, key): """ Uses key values to shift the characters in message Input: key <printable string> message <printable string> Output: encrypted_message <printable string> """ # generate a cypher from the key key_cypher = [ord(character) for character in key] encrypted_message = "" for index, character in enumerate(message): # Get the index of a character in message message_char_index = printable.index(character) # Calculate shift amount from the key index_shift = key_cypher[index % len(key_cypher)] # Right-shift the message character encrypted_char_index = (message_char_index + index_shift) % len(printable) # Add encrypted character to the message encrypted_message += printable[encrypted_char_index] return encrypted_message
def load_data(__self__, kfold=False): # Load data DATA_HOME = '../../data/' df = pd.read_csv(DATA_HOME + 'urls.csv', encoding='ISO-8859-1', sep=';') # Initial Data Preparation URL # Step 1: Convert raw URL string in list of lists where characters that are contained in "printable" are sotred encoded as integer url_int_tokens = [[ printable.index(x) + 1 for x in url if x in printable ] for url in df.url] # Step 2: Cut URL string at max_len or pad with zeros if shorter max_len = 75 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) # Step 3: Extract labels form df to numpy array target = np.array(df.label) # 'label' 컬럼 읽어옴 if kfold: return X, target else: # Simple Cross-Validation: Split the data set into training and test data X_train, X_test, target_train, target_test = model_selection.train_test_split( X, target, test_size=0.2, random_state=33) return X_train, X_test, target_train, target_test
def preprocessing(df, data_num): # Extract sample data 50000 #df = df.sample(n=data_num) # Tokenizing domain string on character level # domain string to vector url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable] for url in df.url] # Padding domain integer max_len=64 # 최대길이 80으로 지정 max_len = 80 x = sequence.pad_sequences(url_int_tokens, maxlen=max_len) # Lable data label_arr = [] for i in df['class']: if i == 0: label_arr.append(0) else : label_arr.append(1) y = np.array(label_arr) x_data = pd.DataFrame(x.reshape(data_num, 80)) y_data = pd.DataFrame(y.reshape(data_num, 1)) return x_data, y_data
def gen_text(rules, limit=10): s = "" i = 0 while i < len(rules): # manage the token ,list or the character # token if rules[i] == '\\': i += 1 if rules[i] == "d": pool = digits elif rules[i] == "w": pool = ascii_letters # list or interval elif rules[i] == '[': i += 1 if rules[i + 1] == '-': # interval beg, end = rules[i], rules[i + 2] pool = printable[printable.index(beg):printable.index(end) + 1] i = i + 3 else: # list pool = "" while rules[i] != ']': pool += rules[i] i += 1 # character else: pool = rules[i] i += 1 # manage the num of repetition k = 1 if i < len(rules): if rules[i] == "*": k = random.randint(0, limit) i += 1 elif rules[i] == '{': i += 1 k = int(rules[i]) i += 2 elif rules[i] == "+": k = random.randint(1, limit) i += 1 s += choices(pool, k) return s
def submitCallBack(): url = E1.get() url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]] max_len = 75 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) model = load_model('model_weights95.json', 'model_weights95.h5') target_proba = model.predict(X, batch_size=1) messagebox.showinfo("URL Checker Result", " " + url + " is " + print_result(target_proba[0]))
def sequence(string,maxlen=100): tokens = [] for c in string: if not c in printable: continue tokens.append(printable.index(c)) tokens = tokens[-maxlen:] if len(tokens) < maxlen: tokens = [0]*(maxlen-len(tokens))+tokens return tokens
def methinks(self, population): for individual in population: for j in range(self.individual_size): if random() < self.mutation_probability: if individual['gen'][j] == printable[0]: individual['gen'][j] = printable[1] elif individual['gen'][j] == printable[-1]: individual['gen'][j] = printable[-2] else: individual['gen'][j]=printable[printable.index(individual['gen'][j])+choice([-1,1])]
def methinks(self, individual): neighbors = [] for i in range(self.individual_size): neighbor = {'gen':deepcopy(individual['gen'])} if neighbor['gen'][i] == printable[0]: neighbor['gen'][i] = printable[1] elif neighbor['gen'][i] == printable[-1]: neighbor['gen'][i] = printable[-2] else: neighbor['gen'][i] = printable[printable.index(neighbor['gen'][i])+choice([-1,1])] neighbors.append(neighbor) return neighbors
def construct_file(in_file): """ Builds the file based off of the encryption key and prints it to the file """ offset_seed = printable[randint( 0, len(printable) - 1 )] # Since the seed is technically an alphabet, the offset changes where the alphabet starts, so to speak. It makes the file harder to decipher. generate_seed() write_to_file = "#encrypted " write_to_file += offset_seed write_to_file += "".join( str(two_chars) for char_sequence in seed for two_chars in char_sequence) for letter in in_file: if printable.index(letter) + printable.index(offset_seed) >= len( printable): write_to_file += "".join( seed[printable.index(letter) + printable.index(offset_seed) - len(printable)]) else: write_to_file += "".join(seed[printable.index(letter) + printable.index(offset_seed)]) return write_to_file
def processPartition(self, server_interface, arg_reader, res_writer): server_interface.log( "Split a Varchar and represent it using ASCII indexes of the first 1280 elements" ) while (True): x = "" if arg_reader.isNull(0) else arg_reader.getString(0) n = len(x) for i in range(1280): idx = printable.index(x[i]) if i < n else 0 res_writer.setInt(i, idx) res_writer.next() if not arg_reader.next(): break
def detect_url(self, url): model = self.lstm_conv() model.load_weights('./model/model_convolutional.h5') url_int_tokens = [[ printable.index(x) + 1 for x in url if x in printable ]] X = sequence.pad_sequences(url_int_tokens, maxlen=75) target_proba = model.predict(X, batch_size=1) clear_session() if target_proba[0] > 0.5: return "malicious" else: return "benign"
def init_string(raw_text): """ Read in the string provided by main, determine the seed offset and delete extraneous characters """ global seed seed = [] global in_file in_file = raw_text in_file = in_file[10:] global offset offset = printable.index(in_file[1]) in_file = in_file[0:]
def crypt(self): cryptt = "" try: file = open(self.filename.get(), "r") except: print("You don't select a dcrypt file") return False crypt = file.read().split("_") inputt = self.decrypt.get() for i in inputt: c = printable.index(i) cryptt += crypt[c] self.encrypt.set(cryptt) self.decrypt.set("")
def submitCallBack(): url = E1.get() url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]] max_len = 75 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) model = load_model('model_weights95.json', 'model_weights95.h5') target_proba = model.predict(X, batch_size=1) p = ap.PhishTank() result = p.check(url) if (result.valid == "True" and print_result(target_proba[0]) == "False"): messagebox.showinfo("Result", " " + url + " is a phish!") else: if result.valid: messagebox.showinfo("Result", " " + url + " is a phish!") else: messagebox.showinfo("Result", " " + url + " is not a phish!")
def load_data_binary(): """ Load and pre-process data. 1) Load data from dir 2) Tokenizing 3) Padding return train and test data """ # Load data data_home = '../data/' df = pd.read_csv(data_home + 'url_label.csv', encoding='ISO-8859-1', sep=',') # Tokenizing domain string on character level # domain string to vector url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable] for url in df.url] # tokenizer = Tokenizer(filters='', lower=False, char_level=True) # tokenizer.fit_on_texts(df.url) # # print(tokenizer.word_index) # # url_int_tokens = tokenizer.texts_to_sequences(df.url) # Padding domain integer max_len=64 # 최대길이 80으로 지정 max_len = 80 x = sequence.pad_sequences(url_int_tokens, maxlen=max_len) label_arr = [] for i in df['class']: if i == 0: label_arr.append(0) else : label_arr.append(1) y = np.array(label_arr) # Cross-validation x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.1, random_state=33) return x_train, x_test, y_train, y_test
def predictlist(): if model: try: # Step 2: Cut URL string at max_len or pad with zeros if shorter max_len = 400 json_ = request.json print(json_) print(len(json_)) u = [] result = [] for i in range(0, len(json_) - 1): url = json_[i]['url'] l = re.compile(r"https?://(www\.)?") r = re.compile(r"(www\.)?") urls = [l.sub('', url).strip().strip('/')] urls = [r.sub('', urls[0])] print(urls) # convert url to tokens url_int_tokens = [[ printable.index(x) + 1 for x in url if x in printable ] for url in urls] u.append(url_int_tokens) print(u) query = sequence.pad_sequences(url_int_tokens, maxlen=max_len, padding='post', truncating='post') with graph.as_default(): prediction = model.predict(query) print(prediction[0]) pred_proba = model.predict_proba(query) print(pred_proba) if prediction[0] > 0.50: result.append({'bad': str(pred_proba[0])}) else: result.append({'good': str(pred_proba[0])}) return jsonify(result) except: return jsonify({'trace': traceback.format_exc()}) else: print('Train the model first') return ('No model here to use')
def index(): form = LoginForm() if form.validate_on_submit(): url = form.url.data l = re.compile(r"https?://(www\.)? ") r = re.compile(r"(www\.)?") urls = [l.sub('', url).strip().strip('/')] urls = [r.sub('', urls[0])] # convert url to tokens url_int_tokens = [[ printable.index(x) + 1 for x in url if x in printable ] for url in urls] # Step 2: Cut URL string at max_len or pad with zeros if shorter max_len = 400 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len, padding='post') with graph.as_default(): prediction = model.predict(X) if prediction[0] > 0.50: # prediction = "MALICIOUS" return render_template("success.html", url=form.url.data, status="Malicious", value=prediction[0]) else: # prediction = "NOT MALICIOUS" return render_template("success.html", url=form.url.data, status="Not Malicious", value=prediction[0]) # return render_template('success.html', url = form.url.data, prediction = prediction) return render_template('index.html', form=form)
if Path(fileWeights).is_file(): os.remove(fileWeights) model.save_weights(fileWeights) with tf.device("/GPU:0"): # Load data DATA_HOME = '../data/' df = pd.read_csv(DATA_HOME + 'dga_label_shuffle.csv', encoding='ISO-8859-1', sep=',') # Convert domain string to integer # URL 알파벳을 숫자로 변경 url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable] for url in df.domain] # Padding domain integer max_len=74 # 최대길이 74로 지정 max_len = 74 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) y = np.array(df['class']) # Cross-validation X_train, X_test, y_train0, y_test0 = model_selection.train_test_split( X, y, test_size=0.2, random_state=33) # dga class: 0~20: 21개 y_train = np_utils.to_categorical(y_train0, 21)
import telnetlib from string import printable tn = telnetlib.Telnet('mercury.picoctf.net', 58251) response = tn.read_until(b'me: ').split(b'\n') flag = response[0].split(b': ')[1] known = b'' cts = [] printable = [a.encode() for a in printable[0:printable.index('}') + 1]] def remove_knowns(res, cts): for ct in cts: if ct in res: idx = res.index(ct) res = res[:idx] + res[idx + len(ct):] return res # Try until last char is '}' (125) while known == b'' or known[-1] != 125: found = False # Write and read all at once as it's faster tn.write(b'\n'.join([known + char for char in printable]) + b'\n') for char in printable: tn.read_until(b': ') ct = remove_knowns(tn.read_until(b': ').split(b'\n')[0], cts)
from string import printable as _printable from random import choice import os import argparse _symbol_index = _printable.index('~') + 1 _alnum_index = _printable.index('Z') + 1 _curr_dir = os.path.dirname(__file__) with open(os.path.join(_curr_dir, 'words.txt')) as f: _words = list(filter(str.isalpha, f.read().splitlines())) def pword(words=4, sep=' '): return sep.join( choice(_words) for _ in range(words) ) def rand_pass(length=8, use_symbols=True): index = [_alnum_index, _symbol_index][use_symbols] return ''.join( choice(_printable[:index]) for _ in range(length) ) def main(): parser = argparse.ArgumentParser(description='Pass some words') parser.add_argument('--count', '-c', type=int, default=5) parser.add_argument('--words', '-w', type=int, default=pword.__defaults__[0]) parser.add_argument('--seperator', '-s', default=pword.__defaults__[1]) args = parser.parse_args() for _ in range(args.count): print(pword(args.words, args.seperator))
def run(): # Load data URL data file = "url_data.csv" url_data = pd.read_csv(file) # Data Preprocessing # Convert characters in url string (contained in printable) into integer url_int_tokens = [[printable.index(char) + 1 for char in url if char in printable] for url in url_data.url] # limit each url_int_tokens string at a maximum length and pad with zeros if shorter maximum__data_length = 100 x = sequence.pad_sequences(url_int_tokens, maxlen=maximum__data_length) # extracting labels from dataframe to numpy array y = np.array(url_data.isMalicious) print('Matrix dimensions of X: ', x.shape, 'Vector dimension of target: ', y.shape) # Cross-Validation - split data set into training and test data x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.1, random_state=33) #model parameters maximum_number_of_epoch = 1 batch_size = 64 embedding_dimension = 128 maximum_vocabulary_length = 100 lstm_output_size = 32 model = cnn_lstm(maximum__data_length, embedding_dimension, maximum_vocabulary_length, lstm_output_size) model.fit(x_train, y_train, epochs=maximum_number_of_epoch, batch_size=batch_size) """--------------------------------------prepare plotting parameters-----------------------------------------""" prediction_lstm = model.predict(x_test).ravel() false_positive_lstm, true_positive_lstm, threshold_lstm = roc_curve(y_test, prediction_lstm) auc_lstm = auc(false_positive_lstm, true_positive_lstm) random_forest_classifier = RandomForestClassifier(max_depth=3, n_estimators=10) random_forest_classifier.fit(x_train,y_train) prediction_rf = random_forest_classifier.predict_log_proba(x_test)[:, 1] false_positive_rf, true_positive_rf, threshold_rf = roc_curve(y_test, prediction_rf) auc_rf = auc(false_positive_rf, true_positive_rf) """--------------------------------------graph plotting-----------------------------------------""" plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(false_positive_lstm, true_positive_lstm, label='CNN_LSTM (area = {:.3f})'.format(auc_lstm)) plt.plot(false_positive_rf, true_positive_rf, label='RF (area = {:.3f})'.format(auc_rf)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve') plt.legend(loc='best') plt.show() # Zoom in view of the upper left corner. plt.figure(2) plt.xlim(0, 0.2) plt.ylim(0.8, 1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(false_positive_lstm, true_positive_lstm, label='Keras (area = {:.3f})'.format(auc_lstm)) plt.plot(false_positive_rf, true_positive_rf, label='RF (area = {:.3f})'.format(auc_rf)) plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC curve (zoomed in at top left)') plt.legend(loc='best') plt.show() """--------------------------------------Model Evaluation-----------------------------------------""" # Final evaluation of the model print("final evaluation of the cnn_lstm model") scores = model.evaluate(x_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1]*100)) AUC = round(auc_lstm*100,2) Accuracy = round(scores[1]*100,2) Recall = round(scores[2]*100,2) Precision = round(scores[3]*100,2) F1score = round(scores[4]*100,2) #tabulating_accuracy_into_table form t = Texttable() t.add_rows([['AUC','Accuracy','Recall','Precision','F1-score'],[AUC,Accuracy,Recall, Precision,F1score]]) print(t.draw()) """--------------------------------------Save Model -----------------------------------------""" # serialize model to JSON model_json = model.to_json() with open("cnn_lstm_model.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("cnn_lstm_model.h5") print("Saved model to disk") """--------------------------------------Save Model -----------------------------------------
def Vector(url): max_len = 75 url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]] X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) return X
def submitCallBack(): url = E1.get() url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]] max_len = 75 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) model = load_model('model_weights95.json', 'model_weights95.h5') target_proba = model.predict(X, batch_size=1) from datetime import datetime from datetime import timedelta import requests import base64 import sys class PhishTankError(Exception): pass class Result(): def __init__(self, response): self.url = response.get('url', None) self.in_database = response.get('in_database', None) self.phish_id = response.get('phish_id', None) self.phish_detail_page = response.get('phish_detail_page', None) self.verified = response.get('verified', None) self.valid = response.get('valid', None) def __phish(self): if self.valid: return True return False class PhishTank(): __apikey = 'c2450ce424e2ad46756de2e9f0d6572e4053d119661baa48679e547006ba5662' def __init__( self, api_url='http://checkurl.phishtank.com/checkurl/', apikey='c2450ce424e2ad46756de2e9f0d6572e4053d119661baa48679e547006ba5662' ): self.__apikey = apikey self._api_url = api_url def check(self, url): post_data = { 'url': base64.b64encode(url.encode("utf-8")), 'format': 'json', 'app_key': self.__apikey, } response = requests.post(self._api_url, data=post_data) data = response.json() if 'errortext' in data.keys(): raise PhishTankError(data['errortext']) return Result(data['results']) p = PhishTank() result = p.check(url) if (result.valid == "True" and print_result(target_proba[0]) == "False"): messagebox.showinfo("Result", " " + url + " is a phish!") else: if result.valid: messagebox.showinfo("Result", " " + url + " is a phish!") else: messagebox.showinfo("Result", " " + url + " is not a phish!")
def main(): url = sys.argv[1] def load_model(fileModelJSON,fileWeights): with open(fileModelJSON, 'r') as f: model_json = json.load(f) model = model_from_json(model_json) model.load_weights(fileWeights) return model def print_result(proba): if proba > 0.5: return "False" else: return "True" url_int_tokens = [[printable.index(x) + 1 for x in url if x in printable]] max_len=75 X = sequence.pad_sequences(url_int_tokens, maxlen=max_len) model = load_model("C:\\xampp\\htdocs\\1\\model_weights95.json","C:\\xampp\\htdocs\\1\\model_weights95.h5") target_proba = model.predict(X, batch_size=1) from datetime import datetime from datetime import timedelta import requests import base64 class PhishTankError(Exception): pass class Result(): def __init__(self, response): self.url = response.get('url', None) self.in_database = response.get('in_database', None) self.phish_id = response.get('phish_id', None) self.phish_detail_page = response.get('phish_detail_page', None) self.verified = response.get('verified', None) self.valid = response.get('valid', None) def __phish(self): if self.valid: return True return False class PhishTank(): __apikey = 'c2450ce424e2ad46756de2e9f0d6572e4053d119661baa48679e547006ba5662' def __init__(self, api_url='http://checkurl.phishtank.com/checkurl/', apikey='c2450ce424e2ad46756de2e9f0d6572e4053d119661baa48679e547006ba5662'): self.__apikey = apikey self._api_url = api_url def check(self, url): post_data = { 'url': base64.b64encode(url.encode("utf-8")), 'format': 'json', 'app_key': self.__apikey, } response = requests.post(self._api_url, data=post_data) data = response.json() if 'errortext' in data.keys(): raise PhishTankError(data['errortext']) return Result(data['results']) p = PhishTank() result =p.check(url) if (result.valid=="True" and print_result(target_proba[0])=="False"): print("Malicious") else: if result.valid: print("Malicious") else: print("SAFE") print_result(target_proba[0])
def encode(plaintext, key): return ''.join(key[printable.index(char)] for char in plaintext)
from string import printable from torchtext import data from torch import cuda SEED = 1234 torch.manual_seed(SEED) #torch.cuda.manual_seed(SEED) torch.backends.cudnn.deterministic = True data = pd.read_csv('data.csv',encoding='latin-1', error_bad_lines=False) data.label = [0 if i == 'good' else 1 for i in data.label] print('Data size: ', data.shape[0]) url_tokens = [[printable.index(x) + 1 for x in url if x in printable] for url in data.url] max_len=75 X = sequence.pad_sequences(url_tokens, maxlen=max_len) Y = np.array(data['label']) print('Matrix dimensions of X: ', X.shape, 'Vector dimension of target: ', Y.shape) class cnn(nn.Module): def __init__(self, vocab_size,max_num_hidden_layers,embedding_dim, n_classes,n_filters,filter_size, output_dim,dropout, batch_size=1,b=0.99, n=0.01, s=0.2, use_cuda=False): super(clstm, self).__init__() if torch.cuda.is_available() and use_cuda: print("Using CUDA :]") self.device = torch.device( "cuda:0" if torch.cuda.is_available() and use_cuda else "cpu")
def base64decode_filter(string): return ''.join([ printable[(printable.index(c) - (i + 13)) % len(printable)] if c in printable else c for i, c in enumerate(b64decode(string)) ])