def get_info_from_EndNote(file_url, return_source=False): """Populate the Publication with information from its profile""" result = True EndNode_file = None while result and EndNode_file is None: EndNode_file = utils.get_text_data(file_url) if EndNode_file is None: result = None # while result is None: # result = input('Do not load EndNote file from scholar. Try again? [Y/N]').lower() # if result == "y": result = True # elif result == "n": result = False if EndNode_file is None: logger.debug("Download empty EndNote file.") return None EndNode_file = EndNode_file.replace("\r", "") logger.debug("EndNote file:\n%s" % EndNode_file) EndNote_info = EndNote_parsing(EndNode_file) if not EndNote_info: return None if "pages" in EndNote_info: try: pages = EndNote_info["pages"].split("-") if len(pages) == 2: start_page = pages[0].strip() end_page = pages[1].strip() re_st_page = re.search("[0-9]+", start_page) re_end_page = re.search("[0-9]+", end_page) if re_st_page: EndNote_info["start_page"] = int(re_st_page.group(0)) if re_end_page: EndNote_info["end_page"] = int(re_end_page.group(0)) if re_st_page and re_end_page: EndNote_info["pages"] = abs(EndNote_info["end_page"] - EndNote_info["start_page"] + 1) else: re_st_page = re.search("[0-9]+", EndNote_info["pages"]) EndNote_info["pages"] = int(re_st_page.group(0)) except Exception as error: logger.warn("Can't eval count of pages for paper.") try: EndNote_info["pages"] = int(EndNote_info["pages"]) except BaseException: EndNote_info["pages"] = None if return_source: EndNote_info.update({"EndNote": EndNode_file}) return EndNote_info
def train_text(): x_train, x_valid, x_test, y_train, y_valid, y_test = utils.get_text_data() embedding = embedding_utils.Embedding() rnn_units = 128 dropout = 0.2 recurrent_dropout = 0.2 num_classes = len(utils.text_label_list) models = model.Text_classification_models( units=rnn_units, dropout=dropout, recurrent_dropout=recurrent_dropout, num_classes=num_classes) rnn_model = models.bi_rnn() rnn_model.fit(x_train, y_train, epochs=hm_epoch, batch_size=hm_batch) rnn_model.evaluate(x_test, y_test, batch_size=hm_batch, verbose=0) print('Saving model...') rnn_model.save(model_path + '/rnn_model.h5')
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)") # Misc Parameters tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS FLAGS._parse_flags() print("\nParameters:") for attr, value in sorted(FLAGS.__flags.items()): print("{}={}".format(attr.upper(), value)) print("") X_train, Y_train, X_test, Y_true, vocab_processor = utils.get_text_data() # print("fitting/saving") # clf = Pipeline([ ("word2vec vectorizer", TfidfEmbeddingVectorizer(w2v)), # ("logistic regression", linear_model.LogisticRegression())]) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=X_train.shape[1], num_classes=2, vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim,
s = ['But lets face it: At the core of this line of thinking isnt safety -- its sex', '--These are parts of their cars.'] # test_len = 10 max_generated_len = 10 x = tokenizer_en.tokenize(s) print(x) print(tokenizer_vi.merge([i for i in model.predict(x, max_len=max_generated_len)])) optimizer = torch.optim.AdamW(model.parameters(), lr=0.001,) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.82) train_en_text, train_vi_text, valid_en_text, valid_vi_text, test_en_text, test_vi_text = get_text_data() train_en, valid_en, test_en = [tokenizer_en.tokenize(i) for i in [train_en_text, valid_en_text, test_en_text]] train_vi, valid_vi, test_vi = [tokenizer_vi.tokenize(i) for i in [train_vi_text, valid_vi_text, test_vi_text]] def eval(valid_en=valid_en, valid_vi=valid_vi, full_detail=False, confusion=False): y_true = [] y_pred = [] total_loss = [] batch_size = config.batch_size s = ['But lets face it: At the core of this line of thinking isnt safety -- its sex', 'Process finished with exit code 0'] max_generated_len = 20 x = tokenizer_en.tokenize(s)
# # Main script to actually run cipher decryption # from utils import get_args, get_text_data from substitution import Substitution from vigenere import Vigenere from caesar import Caesar args = get_args() file_data = get_text_data(args['FILENAME']) text = file_data if args['CIPHER'] == 'SUBSTITUTION': substitution = Substitution() if args['SHOULD_ENCRYPT']: key = args['ENCRYPTION_KEY'] encrypted = substitution.encrypt(text, key) print(encrypted) elif args['SHOULD_DECRYPT']: decrypted = substitution.decrypt(text) print(decrypted) elif args['CIPHER'] == 'VIGENERE': vigenere = Vigenere() if args['SHOULD_ENCRYPT']: key = args['ENCRYPTION_KEY'] encrypted = vigenere.encrypt(text, key)