def split_and_train(path_to_df, text_field, label_field, split_params={}, save_dir="./", preprocessing_function=None, additional_fields_and_preps={}, additional_data_paths=[], hyperparams={}, log_dir="./", use_gpu=False, postfix="", verbose=True, remove_extra_labels=True): """ Split dataframe with the given params into train and test. Train a model on train and :param path_to_df: str, path to csv or parquet file :param text_field: str, column of the dataframe in which is the text that should be classified :param label_field: str, column of the dataframe in which is the label of the corresponding text :param split_params: dict, input format: {"seed": int, default 17, "fraction": float, default: 0.1} :param save_dir: str, directory to save the txt files :param preprocessing_function: function, function to apply on text_field column :param additional_fields_and_preps: dict. Dictionary in the following format {field_name1: preprocessing_function1, field_name2: preprocessing_function2} to enable custom preprocessing for different fields :param additional_data_paths: list of str, paths of fasttext format additional data to concat with train file :param hyperparams: dict, all hyperparams for train_supervised :param log_dir: str, directory to save the training files and the model :param postfix: str, postfix to add to train and validation files :param verbose: bool :param use_gpu: bool, use gpu for training :param verbose: bool :param remove_extra_labels: remove datapoints with labels which appear in additional_data_paths but not in train_data_path :return: object. FastTextModel """ train_data_path, val_data_path = \ train_val_split_from_df(path_to_df=path_to_df, text_field=text_field, label_field=label_field, split_params=split_params, save_dir=save_dir, preprocessing_function=preprocessing_function, verbose=verbose, additional_fields_and_preps=additional_fields_and_preps, postfix=postfix) if verbose: print("train path {}".format(train_data_path)) print("val path {}".format(val_data_path)) hypers_new = hyperparams.copy() if additional_fields_and_preps: hypers_new["result_dir"] = os.path.join(log_dir, "{}_{}".format(hash_function(preprocessing_function), "_".join(additional_fields_and_preps.keys()))) else: hypers_new["result_dir"] = os.path.join(log_dir, hash_function(preprocessing_function)) hypers_new["use_gpu"] = int(use_gpu) hypers_new["split_and_train_params"] = { "df_path": path_to_df, "split_params": split_params, "additional_fields_and_preps": additional_fields_and_preps, "remove_extra_labels": remove_extra_labels } return train_supervised(train_data_path=train_data_path, val_data_path=val_data_path, additional_data_paths=additional_data_paths, hyperparams=hypers_new, preprocessing_function=preprocessing_function, remove_extra_labels=remove_extra_labels, log_dir=log_dir, use_gpu=use_gpu, verbose=verbose)
def send(self, msg): message_hash = utils.hash_function(str(msg)) prev_hash = self.blockchain.get_latest_block_hash() if prev_hash is None: prev_hash = constants.GENESIS_HASH current_hash = utils.hash_together(message_hash, prev_hash) self.blockchain.add(current_hash) self.outfile.write("previous hash is " + prev_hash + "\n") self.outfile.write("current hash is " + current_hash + "\n") self.outfile.write("________________________\n") self.outfile.flush() return True
def check_sr(sr, sc): return sr == hash_function(sc + cc + secret)
# client asking the server for authentication sock.send('AUTH') # client verifies the expected value of 'sr' def check_sr(sr, sc): return sr == hash_function(sc + cc + secret) while True: data = sock.recv(size) # server challenge in responce to the 'AUTH' request if 'SC.' in data: sc = data[3:] cr = hash_function(cc + sc + secret) # client sends 'cr' and 'cc' to the server sock.send('CR.' + cr + 'CC.' + cc) # server sends 'sr' if 'SR.' in data: sr = data[3:] if check_sr(sr, sc): print 'TRUE' else: print 'FALSE' # client receive an error response if 'ERROR' in data:
def cross_validate(path_to_df, text_field, label_field, n_folds=5, preprocessing_function=None, additional_fields_and_preps={}, additional_data_paths=[], hyperparams={}, report_top_k=True, log_dir="./", use_gpu=False, return_models=False, seed=17, verbose=False, remove_extra_labels=True): """ :param path_to_df: str, path to csv or parquet file :param text_field: str, column of the dataframe in which is the text that should be classified :param label_field: str, column of the dataframe in which is the label of the corresponding text :param n_folds: int, number of folds :param preprocessing_function: function, function to apply on text_field column :param additional_fields_and_preps: dict. Dictionary in the following format {field_name1: preprocessing_function1, field_name2: preprocessing_function2} to enable custom preprocessing for different fields :param additional_data_paths: list of str, paths of fasttext format additional data to concat with train file :param hyperparams: dict, all hyperparams for train_supervised :param report_top_k: bool. If True will return top k scores, otherwise top 1 scores :param log_dir: str, directory to save the training files and the model :param use_gpu: bool, use gpu for training :param return_models: bool. If True will return tuple (scores, models) :param seed: int :param verbose: bool. :param remove_extra_labels: remove datapoints with labels which appear in additional_data_paths but not in train_data_path :return: list. The scores for each split """ models, scores = [], [] if path_to_df.endswith("parquet"): df = pd.read_parquet(path_to_df) else: df = pd.read_csv(path_to_df) for added_field, prep_f in additional_fields_and_preps.items(): if df[added_field].dtype != "object": df[added_field] = df[added_field].astype(str) if prep_f: df[added_field] = df[added_field].map(prep_f) df[text_field] = df[text_field] + " " + df[added_field] for fold_number, val_mask in enumerate(split_list(len(df), n_folds, seed)): train_data_path, val_data_path = preprocess_and_save(df, val_mask, text_field, label_field, preprocessing_function, additional_fields_and_preps, "./tmp_txt/", "_split{}".format(fold_number), verbose, []) if verbose: print("train path {}".format(train_data_path)) print("val path {}".format(val_data_path)) hypers_new = hyperparams.copy() if additional_fields_and_preps: hypers_new["result_dir"] = os.path.join(log_dir, "{}_{}".format(hash_function(preprocessing_function), "_".join( additional_fields_and_preps.keys()))) else: hypers_new["result_dir"] = os.path.join(log_dir, hash_function(preprocessing_function)) hypers_new["use_gpu"] = int(use_gpu) hypers_new["split_and_train_params"] = { "df_path": path_to_df, "additional_fields_and_preps": additional_fields_and_preps, "remove_extra_labels": remove_extra_labels } model = train_supervised(train_data_path=train_data_path, val_data_path=val_data_path, additional_data_paths=additional_data_paths, hyperparams=hypers_new, preprocessing_function=preprocessing_function, remove_extra_labels=remove_extra_labels, log_dir=log_dir, use_gpu=use_gpu, verbose=verbose) if report_top_k: scores.append(model.top_k_accuracy) else: scores.append(model.top_1_accuracy) if return_models: models.append(model) del model gc.collect() if return_models: return scores, models return scores
def emit_intermediate(self, key, value): hash_value = hash_function(key, self.n_reducers) store_key = 'intermediate_' + str(hash_value) store_value = str(key) + ':' + str(value) self.fs_client.append(store_key, store_value)
def check_cr(cr, cc): return cr == hash_function(cc + sc + secret)
try: while True: data = connection.recv(1024) print data, len(data) if data == 'AUTH': connection.send('SC.' + sc) if 'CR.' in data: try: cc_index = data.index('CC.') except (ValueError): connection.send('ERROR') break cr = data[3:cc_index] cc = data[(cc_index + 3):] # server verified 'cr' if check_cr(cr, cc): sr = hash_function(sc + cc + secret) connection.send('SR.' + sr) else: connection.send('ERROR') break finally: connection.close()
try: while True: data = connection.recv(1024) print data, len(data) if data == 'AUTH': connection.send('SC.' + sc) if 'CR.' in data: try: cc_index = data.index('CC.') except (ValueError): connection.send('ERROR') break cr = data[3:cc_index] cc = data[(cc_index+3):] # server verified 'cr' if check_cr(cr, cc): sr = hash_function(sc + cc + secret) connection.send('SR.' + sr) else: connection.send('ERROR') break finally: connection.close()
sock.connect((host,port)) # client asking the server for authentication sock.send('AUTH') # client verifies the expected value of 'sr' def check_sr(sr, sc): return sr == hash_function(sc + cc + secret) while True: data = sock.recv(size) # server challenge in responce to the 'AUTH' request if 'SC.' in data: sc = data[3:] cr = hash_function(cc + sc + secret) # client sends 'cr' and 'cc' to the server sock.send('CR.' + cr + 'CC.' + cc) # server sends 'sr' if 'SR.' in data: sr = data[3:] if check_sr(sr, sc): print 'TRUE' print 'Authenticated Successfully' #Popen([executable, 'python hybrid_attr_iden_main.py'], creationflags=CREATE_NEW_CONSOLE) subprocess.call('python hybrid_attr_iden_main.py', shell=True) else: print 'FALSE'
import utils as utils GENESIS_HASH = utils.hash_function('0')