def decrypt(): # Its importanr to note that we do not check for a successful decryption or correct password # If intercepted we do not want to give attackers the ability to discern correct from incorrect attempts print("Please enter path to file") file_path = u.read_line() print("Please enter Encryption pass phrase") password = u.read_line() # generating AES cipher using inputted password key = c.create_key(password) # turning .epub into .zip for ease of use zip_name = u.preprocess(file_path) with zipfile.ZipFile(zip_name, mode='r') as myzip: for name in myzip.namelist(): if name.endswith(".xhtml") or name.endswith( ".css") or name.endswith(".opf") or name.endswith(".ncx"): with myzip.open(name) as in_file: contents = in_file.read() out = c.decrypt_AES(key, contents) u.update_zip(zip_name, name, out) else: with myzip.open(name) as in_file: contents = in_file.read() u.update_zip(zip_name, name, contents) u.postprocess(zip_name)
def predict_tweets(self, docs, predict_log_p=False): """ Take in a list of docs and cerate a feature array of size [nexamples]x[nfeatures]. This can be a sparse matrix This matrix/array is then sent to predict and log_likelihood """ nfeatures = len(self.features_) nexamples = len(docs) X = sparse.lil_matrix((nexamples,nfeatures), dtype=np.float) stop_words = util.getStopWords() iexample = -1 for tweet in docs: iexample += 1 tweet = util.preprocess(tweet) words = [w for w in tweet.split() if (len(w)>=3 and w not in stop_words and re.search(r'^[a-zA-Z][a-zA-Z0-9]*$',w))] for f in words: if f in self.features_: X[iexample,self.features_.index(f)] += 1 if not predict_log_p: return self.predict(X) else: return self.predict_logprob(X)
def build(self): print("BUILDING MODEL...") if self.is_built: return self.is_built = True generator_factory = self.create_generator() discriminator_factory = self.create_discriminator() smoothing = 1 seed = self.options.seed kernel = 4 self.input_rgb = tf.placeholder(tf.float32, shape=(None, None, None, 3), name='input_rgb') self.input_color = preprocess(self.input_rgb, colorspace_in=COLORSPACE_RGB, colorspace_out='LAB') self.input_gray = tf.image.rgb_to_grayscale(self.input_rgb) generator = generator_factory.create(self.input_gray, kernel, seed) discriminator_real = discriminator_factory.create(tf.concat([self.input_gray, self.input_color], 3), kernel, seed) discriminator_fake = discriminator_factory.create(tf.concat([self.input_gray, generator], 3), kernel, seed, reuse_variables=True) generator_ce = tf.nn.sigmoid_cross_entropy_with_logits(logits=discriminator_fake, labels=tf.ones_like(discriminator_fake)) discriminator_real_ce = tf.nn.sigmoid_cross_entropy_with_logits(logits=discriminator_real, labels=tf.ones_like(discriminator_real) * smoothing) discriminator_real_ce = tf.nn.sigmoid_cross_entropy_with_logits(logits=discriminator_fake, labels=tf.zeros_like(discriminator_fake)) self.dis_loss_real = tf.reduce_mean(discriminator_real_ce) self.dis_loss_fake = tf.reduce_mean(discriminator_real_ce) self.dis_loss = tf.reduce_mean(discriminator_real_ce + discriminator_real_ce) self.gen_loss_gan = tf.reduce_mean(generator_ce) self.gen_loss_l1 = tf.reduce_mean(tf.abs(self.input_color - generator)) * 100.0 self.gen_loss = self.gen_loss_gan + self.gen_loss_l1 self.sampler = tf.identity(generator_factory.create(self.input_gray, kernel, seed, reuse_variables=True), name='output') self.accuracy = pixelwise_accuracy(self.input_color, generator, 'LAB', 2.0) self.learning_rate = tf.constant(self.options.lr) if self.options.lr_decay and self.options.lr_decay_rate > 0: self.learning_rate = tf.maximum(1e-6, tf.train.exponential_decay( learning_rate=self.options.lr, global_step=self.global_step, decay_steps=self.options.lr_decay_steps, decay_rate=self.options.lr_decay_rate)) self.gen_train = tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0 ).minimize(self.gen_loss, var_list=generator_factory.var_list) self.dis_train = tf.train.AdamOptimizer( learning_rate=self.learning_rate / 10, beta1=0 ).minimize(self.dis_loss, var_list=discriminator_factory.var_list, global_step=self.global_step) self.saver = tf.train.Saver()
def storyline(): # _items = db.appdb.find() # items = [items for items in _items] #processing text to return data :{line: text, pos_line:{n:[],v:[],adj:[{n:JJ},{n:JJ}],cd:[{n:CD}],pronoun_line:text} #preprocess also makes sure that verbs are written with _number in the increasing order in which they appear if request.method == 'POST': data, charDictList = utility.preprocess(request.json) else: line = {"data": {"line": request.args.get("line")}, "animCharDict": {}} linejson = jsonify(line) data, charDictList = utility.preprocess(linejson.json) if data == None: return jsonify({"error": "Text is empty, I wanna hear your story!"}) result = par.parse(data, charDictList, db) return json.dumps(result, default=str)
def find_intent_match(self, responses, user_message): if len(responses) == 0: return self.idk_response(user_message) processed_message = Counter(preprocess(user_message)) processed_responses = [ Counter(preprocess(response)) for response in responses ] similarity_list = [ compare_overlap(processed_message, rep) for rep in processed_responses ] # If none of the responses really fit what the user is asking: if (max(similarity_list) < 1 or len(responses) == 0): return self.idk_response(user_message) response_index = similarity_list.index(max(similarity_list)) return responses[response_index]
def encrypt(): print("Please enter path to file") file_path = u.read_line() print("Please enter Encryption pass phrase") password = u.read_line() manifest = u.get_epub_info(file_path) actual_content = [] # generating AES cipher using inputted password key = c.create_key(password) # turning .epub into .zip for ease of use zip_name = u.preprocess(file_path) with zipfile.ZipFile(zip_name, mode='r') as myzip: for name in myzip.namelist(): if name.startswith("OEBPS/"): actual_content.append(name[6:]) # encrypt only the files that require encryption, ignore meta and image files if name.endswith(".xhtml") or name.endswith( ".css") or name.endswith(".opf") or name.endswith(".ncx"): with myzip.open(name) as in_file: contents = in_file.read() out = c.encrypt_AES(key, contents) u.update_zip(zip_name, name, out) else: with myzip.open(name) as in_file: contents = in_file.read() u.update_zip(zip_name, name, contents) # Compare the file manifest to files found for item in manifest: if item in actual_content: actual_content.remove(item) # Must remove this file as it isn't tracked in the actual manifest # actual_content.remove("package.opf") # actual_content.remove("") if len(actual_content) > 0: # print(len(actual_content)+" Files not listed in manifest") print("[WARN] Following items not listed in manifest") for item in actual_content: print(item) u.postprocess(zip_name)
def solve(num_wizards, num_constraints, wizards, constraints): """ Write your algorithm here. Input: num_wizards: Number of wizards num_constraints: Number of constraints wizards: An array of wizard names, in no particular order constraints: A 2D-array of constraints, where constraints[0] may take the form ['A', 'B', 'C']i Output: An array of wizard names in the ordering your algorithm returns """ constraints = utility.preprocess(wizards, num_constraints, constraints) opt, name = utility.find_optimizable(constraints) print("optimizable wizards: ", len(name)) print("related constraints: ", len(opt)) output = utility.strategy1(num_wizards, wizards, constraints) order = [wizards[o] for o in output] return order
from lightgbm import LGBMClassifier from sklearn.ensemble import (AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier) from sklearn.linear_model import SGDClassifier from xgboost import XGBClassifier from ensemble import Ensemble from utility import read_data, preprocess start = time.time() # Read in our input data df_train, df_test = read_data() df_train, df_test = preprocess(df_train, df_test) id_test = df_test['id'].values X = df_train.drop(['id', 'target'], axis=1) X_test = df_test[X.columns].values X = X.values y = df_train['target'].values df_train = None df_test = None print("Loaded and prepared data in %.2f seconds" % (time.time() - start)) gc.collect()
def read_data(dataset_path): print('Reading Data...') data = pd.read_csv(dataset_path) X, y = data.data.values, data.intent.values return X, y def pipeline_sent_enc(text): return embed([text]).numpy()[0] if __name__ == '__main__': dataset = read_data('dataset/mainModel.csv') dataset = preprocess(dataset, {'tfidf': tfidf, 'tokenizer': tokenizer}) features = { # 'lstm_features' : pipeline_lstm_feature, 'sent_enc': pipeline_sent_enc, # 'glove' : pipeline_avg_glove, # 'idf_glove' : pipeline_idf_glove, # 'tfidf' : pipeline_tfidf_vectorize, # 'elmo' : pipeline_elmo } features = featurize_and_split(dataset, features) """ rf = { 'model' : OneVsRestClassifier(RandomForestClassifier()),
import re import math import sys from nltk.stem import PorterStemmer from utility import preprocess, get_dic_term, remove_stop import time indexfile = sys.argv[1] outfile = open("results.ranked.txt", 'w') queryfile = open(sys.argv[2]) dic = preprocess(indexfile) # read index file into dict ps = PorterStemmer() # function of calculating the tfidf score def tfidf_score(queries, docID, dictionary): socre = 0 for i, phase in enumerate(queries): terms_index = get_dic_term(phase, dictionary, processed=True) df_fren = len(terms_index) if docID in terms_index.keys(): tf_fren = len(terms_index[docID]) else: tf_fren = 0 if df_fren == 0 or tf_fren == 0: s = 0 else: s = (1 + math.log(tf_fren, 10)) * math.log((5000 / df_fren), 10) socre += s
# This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # Train Module # Module to train a CNN model on character data. import numpy as np import cnn import utility input_arguments = utility.parse_input_arguments(module="train") images, labels = utility.load_data(input_arguments.image_path) class_count = len(np.unique(labels)) images, labels = utility.preprocess(images, labels) images, labels = utility.shuffle(images, labels) x_train, x_test, y_train, y_test = utility.split(images, labels, test_size=0.2) cnn = cnn.CNN(x_train.shape[1], x_train.shape[2], x_train.shape[3], class_count) cnn.summary() cnn.train(x_train, y_train, epochs=input_arguments.epochs, batch_size=input_arguments.batch_size, validation_split=input_arguments.validation_split, output_path=input_arguments.output_path) cnn.test(x_test, y_test, output_path=input_arguments.output_path)
# the Free Software Foundation, either version 3 of the License, or # at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # Train Module # Module to train a DCGAN model on image data. import dcgan import utility input_arguments = utility.parse_input_arguments(module="train") images = utility.load_images(input_arguments.image_path) images = utility.preprocess(images) utility.shuffle(images) dcgan = dcgan.DCGAN(images.shape[1], images.shape[2], images.shape[3]) dcgan.summary() dcgan.train(images, epochs=input_arguments.epochs, batch_size=input_arguments.batch_size, saving_frequency=input_arguments.saving_frequency, output_path=input_arguments.output_path)