Esempio n. 1
0
def get_info_from_EndNote(file_url, return_source=False):
    """Populate the Publication with information from its profile"""
    result = True
    EndNode_file = None
    while result and EndNode_file is None:
        EndNode_file = utils.get_text_data(file_url)
        if EndNode_file is None:
            result = None
            # while result is None:
            #    result = input('Do not load EndNote file from scholar. Try again? [Y/N]').lower()
            #    if result == "y": result = True
            #    elif result == "n": result = False
    if EndNode_file is None:
        logger.debug("Download empty EndNote file.")
        return None
    EndNode_file = EndNode_file.replace("\r", "")
    logger.debug("EndNote file:\n%s" % EndNode_file)
    EndNote_info = EndNote_parsing(EndNode_file)
    if not EndNote_info:
        return None
    if "pages" in EndNote_info:
        try:
            pages = EndNote_info["pages"].split("-")
            if len(pages) == 2:
                start_page = pages[0].strip()
                end_page = pages[1].strip()
                re_st_page = re.search("[0-9]+", start_page)
                re_end_page = re.search("[0-9]+", end_page)
                if re_st_page:
                    EndNote_info["start_page"] = int(re_st_page.group(0))
                if re_end_page:
                    EndNote_info["end_page"] = int(re_end_page.group(0))
                if re_st_page and re_end_page:
                    EndNote_info["pages"] = abs(EndNote_info["end_page"] -
                                                EndNote_info["start_page"] + 1)
            else:
                re_st_page = re.search("[0-9]+", EndNote_info["pages"])
                EndNote_info["pages"] = int(re_st_page.group(0))
        except Exception as error:
            logger.warn("Can't eval count of pages for paper.")
            try:
                EndNote_info["pages"] = int(EndNote_info["pages"])
            except BaseException:
                EndNote_info["pages"] = None
    if return_source:
        EndNote_info.update({"EndNote": EndNode_file})
    return EndNote_info
Esempio n. 2
0
def train_text():
    x_train, x_valid, x_test, y_train, y_valid, y_test = utils.get_text_data()
    embedding = embedding_utils.Embedding()

    rnn_units = 128
    dropout = 0.2
    recurrent_dropout = 0.2
    num_classes = len(utils.text_label_list)

    models = model.Text_classification_models(
        units=rnn_units,
        dropout=dropout,
        recurrent_dropout=recurrent_dropout,
        num_classes=num_classes)

    rnn_model = models.bi_rnn()
    rnn_model.fit(x_train, y_train, epochs=hm_epoch, batch_size=hm_batch)
    rnn_model.evaluate(x_test, y_test, batch_size=hm_batch, verbose=0)

    print('Saving model...')
    rnn_model.save(model_path + '/rnn_model.h5')
Esempio n. 3
0
tf.flags.DEFINE_integer("num_checkpoints", 5,
                        "Number of checkpoints to store (default: 5)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True,
                        "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False,
                        "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")

X_train, Y_train, X_test, Y_true, vocab_processor = utils.get_text_data()

# print("fitting/saving")
# clf = Pipeline([ ("word2vec vectorizer", TfidfEmbeddingVectorizer(w2v)),
#                  ("logistic regression", linear_model.LogisticRegression())])

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = TextCNN(sequence_length=X_train.shape[1],
                      num_classes=2,
                      vocab_size=len(vocab_processor.vocabulary_),
                      embedding_size=FLAGS.embedding_dim,
Esempio n. 4
0
s = ['But lets face it: At the core of this line of thinking isnt safety -- its sex',
     '--These are parts of their cars.']

# test_len = 10
max_generated_len = 10

x = tokenizer_en.tokenize(s)
print(x)
print(tokenizer_vi.merge([i for i in model.predict(x, max_len=max_generated_len)]))


optimizer = torch.optim.AdamW(model.parameters(), lr=0.001,)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.82)


train_en_text, train_vi_text, valid_en_text, valid_vi_text, test_en_text, test_vi_text = get_text_data()
train_en, valid_en, test_en = [tokenizer_en.tokenize(i) for i in [train_en_text, valid_en_text, test_en_text]]
train_vi, valid_vi, test_vi = [tokenizer_vi.tokenize(i) for i in [train_vi_text, valid_vi_text, test_vi_text]]


def eval(valid_en=valid_en, valid_vi=valid_vi, full_detail=False, confusion=False):
    y_true = []
    y_pred = []
    total_loss = []
    batch_size = config.batch_size

    s = ['But lets face it: At the core of this line of thinking isnt safety -- its sex',
         'Process finished with exit code 0']
    max_generated_len = 20

    x = tokenizer_en.tokenize(s)
Esempio n. 5
0
#
# Main script to actually run cipher decryption
#

from utils import get_args, get_text_data
from substitution import Substitution
from vigenere import Vigenere
from caesar import Caesar

args = get_args()
file_data = get_text_data(args['FILENAME'])
text = file_data

if args['CIPHER'] == 'SUBSTITUTION':
    substitution = Substitution()

    if args['SHOULD_ENCRYPT']:
        key = args['ENCRYPTION_KEY']
        encrypted = substitution.encrypt(text, key)
        print(encrypted)

    elif args['SHOULD_DECRYPT']:
        decrypted = substitution.decrypt(text)
        print(decrypted)

elif args['CIPHER'] == 'VIGENERE':
    vigenere = Vigenere()

    if args['SHOULD_ENCRYPT']:
        key = args['ENCRYPTION_KEY']
        encrypted = vigenere.encrypt(text, key)