Beispiel #1
0
def test_create_string_values_parses_to_0s():
    data = "1,2,foo;"
    parser = Parser(data)

    assert parser.parsed_data == [[[1.0, 2.0, 0.0], [0, 0, 0]]]

    data = "1,2,foo|4,bar,6;"
    parser = Parser(data)

    assert parser.parsed_data == [[[1.0, 2.0, 0.0], [4.0, 0.0, 6.0]]]
Beispiel #2
0
def test_Parser_clean():
    """Cleaning function test"""

    question = 'Ou se trouve le musée du Louvre?'
    parser = Parser(question)
    result = parser.clean()
    assert result == "musee du louvre"
Beispiel #3
0
def test_remove_stop_words():
    """test to remove stop words with a new
    sentence"""

    sentence = "salut grandpy a quel endroit se trouve le musee du louvre"
    p = Parser(sentence)
    assert p.remove_stop_words(sentence) == "musee louvre"
Beispiel #4
0
def test_create_combined_data():
    data = '0.123,-0.123,5;0.456,-0.789,0.111;-0.212,0.001,1;'
    parser = Parser(data)
    processor = Processor(parser.parsed_data)

    assert processor.dot_product_data == [0.0, 0.0, 0.0005219529804999682]
    assert processor.filtered_data == [0, 0, 4.753597533351234e-05]
Beispiel #5
0
def test_create_separated_data():
    data = '0.028,-0.072,5|0.129,-0.945,-5;0,-0.07,0.06|0.123,-0.947,5;0.2,-1,2|0.1,-0.9,3;'
    parser = Parser(data)
    processor = Processor(parser.parsed_data)

    assert processor.dot_product_data == [-24.928348, 0.36629, 6.92]
    assert processor.filtered_data == [0, 0, -1.7004231121083724]
Beispiel #6
0
def test_take_away_special_characters():
    """Test to remove special characters from a sentence"""

    p = Parser("Hello my name is Jon ,?;.:/!-*+%$€_£¤=@|°}]¨[(){'#~&²")
    assert p.remove_all_special_characters(
        "hello my name is Jon,?;.:/!-*+%$€_£¤=@|°}]¨[(){'#~&²"
    ) == "Hello my name is Jon                                 "
 def __init__(self):
     super(Downloader, self).__init__()
     self.courses = []
     self.parser = Parser()
     self.manager = Manager()
     self.course_queue = self.manager.Queue()
     self.lect_queue = self.manager.Queue()
Beispiel #8
0
def test_remove_all_accents():
    """test to transform special characters into
    normal characters"""

    p = Parser("éèêëãàäâåîïìöôòõñûüÿ")
    assert p.remove_all_accents(
        "éèêëãàäâåîïìöôòõñûüÿ") == "eeeeaaaaaiiioooonuuy"
Beispiel #9
0
def test_Parser_extract_place():
    """Place extraction function test"""

    question = 'ou se trouve le musee du louvre?'
    parser = Parser(question)
    result = parser._extract_place(question)
    assert result == " le musee du louvre"
Beispiel #10
0
def test_Parser_delete_article():
    """Test article deletion function"""

    question = 'le musee du louvre?'
    parser = Parser(question)
    result = parser._delete_article(question)
    assert result == "musee du louvre?"
Beispiel #11
0
def grandpy(user_text):
    """Application class instance"""

    data = {"status": False}

    parser = Parser(user_text)
    result = parser.clean()

    api_google = ApiGoogle()
    adress, coo = api_google.api_reading(result)

    if adress and coo:
        api_wiki = ApiWikipedia()
        page_id = api_wiki.api_get_page_id(**coo)

        if page_id:
            extract, url = api_wiki.api_get_extract(page_id)
            data = {
                "status": True,
                "question": user_text,
                "article": extract,
                "coords": coo,
                "url": url,
                "adress": adress,
                "response": "Voilà l'endroit demandé mon petit !"
            }

    if not data.get("status"):
        data = {
            "question": user_text,
            "status": False,
            "response": "Je n'ai pas la reponse à la question..!!"
        }

    return data
Beispiel #12
0
def find_place_in_sentence(text):
    """ Returns the position of the place the usertext
    is looking for """
    parser = Parser(text)
    parser.capture_regular_expression(text)
    address_ask_by_user = parser.address
    return address_ask_by_user
Beispiel #13
0
def main(cfg: DictConfig) -> None:
    "The entry point for parsing user-provided texts"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    assert cfg.input is not None
    assert cfg.language in ("english", "chinese")
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # load the model checkpoint
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)
    vocabs = checkpoint["vocabs"]

    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    input_file = hydra.utils.to_absolute_path(cfg.input)
    ds = UserProvidedTexts(input_file, cfg.language, vocabs, cfg.encoder)
    loader = DataLoader(
        ds,
        batch_size=cfg.eval_batch_size,
        collate_fn=form_batch,
        num_workers=cfg.num_workers,
        pin_memory=torch.cuda.is_available(),
    )

    env = Environment(loader, model.encoder, subbatch_max_tokens=9999999)
    state = env.reset()
    oup = (sys.stdout if cfg.output is None else open(
        hydra.utils.to_absolute_path(cfg.output), "wt"))
    time_start = time()

    with torch.no_grad():  # type: ignore
        while True:
            with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                actions, _ = model(state)
            state, done = env.step(actions)
            if done:
                for tree in env.pred_trees:
                    assert tree is not None
                    print(tree.linearize(), file=oup)
                # pred_trees.extend(env.pred_trees)
                # load the next batch
                try:
                    with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                        state = env.reset()
                except EpochEnd:
                    # no next batch available (complete)
                    log.info("Time elapsed: %f" % (time() - time_start))
                    break

    if cfg.output is not None:
        log.info("Parse trees saved to %s" % cfg.output)
Beispiel #14
0
def test_create_separated_data():
    data = '0.028,-0.072,5|0.129,-0.945,-5;0,-0.07,0.06|0.123,-0.947,5;0.2,-1,2|0.1,-0.9,3;'
    parser = Parser(data)

    assert parser.parsed_data == [[[0.028, -0.072, 5.0], [0.129, -0.945,
                                                          -5.0]],
                                  [[0.0, -0.07, 0.06], [0.123, -0.947, 5.0]],
                                  [[0.2, -1.0, 2.0], [0.1, -0.9, 3.0]]]
Beispiel #15
0
def test_capture_regular_expression():
    """Test to capture the location and the
    words  that introduces a question about a
    place"""
    
    sentence = "Salut grandpy! Comment s'est passé ta soirée avec Grandma hier soir? Au fait, pendant que j'y pense, pourrais-tu m'indiquer où se trouve le musée d'art et d'histoire de Fribourg, s'il te plaît?"
    p = Parser(sentence)
    assert p.capture_regular_expression(
        sentence) == " où se trouve le musée d'art et d'histoire de Fribourg"
Beispiel #16
0
def test_create_non_zero_data():
    user = User('female', 167, 70)
    trial = Trial('walk 1', 100, 18)
    parser = Parser(open('test/data/female-167-70_walk2-100-10.txt').read())
    processor = Processor(parser.parsed_data)
    analyzer = Analyzer(processor.filtered_data, user, trial)

    assert analyzer.steps == 10
    assert analyzer.delta == -8
    assert analyzer.distance == 700
    assert analyzer.time == 1037 / 100
Beispiel #17
0
def test_create_combined_data():
    data = '0.123,-0.123,5;0.456,-0.789,0.111;-0.212,0.001,1;'
    parser = Parser(data)

    assert parser.parsed_data == [
        [[0.123, -0.123, 5.0], [0, 0, 0]], [[0.456, -0.789, 0.111], [0, 0, 0]],
        [[-0.2120710948533322, 0.0011468544965549535, 0.9994625125426089],
         [
             7.109485333219216e-05, -0.00014685449655495343,
             0.0005374874573911294
         ]]
    ]
Beispiel #18
0
    def post():
        """Post endpoint for registering users into the database.

        Takes params username and password to put into database. Usernames
        cannot be duplicated and will return json message and 400 error if tried.
        On successful user creation will return json message and 201 code"""
        parser = Parser()
        parser.required_fields('username', 'password')
        data = parser.parse_args()
        if UserModel.find_by_username(data['username']):
            return {'message': 'User already exists'}, 400
        UserModel(**data).save_to_db()
        return {'message': 'User created successfully'}, 201
Beispiel #19
0
    def post(self):
        """Post endpoint for adding requests into the database.

        Headers: {Authorization: JWT jwt_token, ContentType: application/json}
        Body must be json with priority, target_date, product_area, client_name,
        title, description fields. Database must have matching client_name or
        will return json message 400 error. If error occurs while inserting into
        database will return json message and 500 error. On successful insert
        into database returns json of request and 201 code."""
        parser = Parser()
        parser.required_fields(
            'priority',
            'target_date',
            'product_area',
            'client_name',
            'title',
            'description')
        data = parser.parse_args()
        if not ClientModel.select(data['client_name']):
            return {
                'message': 'Client does not exist'}, 400
        request = RequestModel(**data)
        client_name = data['client_name']
        priority = data['priority']
        update_list = []
        try:
            while RequestModel.select_same_priority(client_name, priority):
                update_list.append(
                    RequestModel.select_same_priority(
                        client_name, priority))
                priority += 1
            request.save_to_db()
            for req in update_list:
                req.priority += 1
                req.save_to_db()
        except BaseException:
            return {'message': 'Something went wrong'}, 500
        return request.json(), 201
Beispiel #20
0
def ajax():
    """"""
    data = {"status": False}
    user_text = request.form["userText"]

    logger.debug("Question posée :" + user_text)

    parser = Parser(user_text)
    result = parser.clean()

    api_google = ApiGoogle()
    adress, coo = api_google.api_reading(result)

    if adress and coo:
        api_wiki = ApiWikipedia()
        page_id = api_wiki.api_get_page_id(**coo)

        if page_id:
            extract, url = api_wiki.api_get_extract(page_id)
            data = {
                "status": True,
                "question": user_text,
                "article": extract,
                "coords": coo,
                "url": url,
                "adress": adress,
                "response": "Voilà l'endroit demandé mon petit !"
            }

    if not data.get("status"):
        data = {
            "question": user_text,
            "status": False,
            "response": "Je n'ai pas la reponse à la question..!!"
        }

    return jsonify(data)
Beispiel #21
0
def test_remove_stop_words():
    """test to remove stop words"""

    sentence = "salut grandpy je souhaite savoir ou se situe la tour montparnasse"
    p = Parser(sentence)
    assert p.remove_stop_words(sentence) == "tour montparnasse"
Beispiel #22
0
def test_get_lower_cases():
    """test to transform every letter
    in to lower"""
    p = Parser("Hello")
    assert p.get_lower_cases() == "hello"
Beispiel #23
0
def test_remove_spaces():
    """test to remove spaces in a sentence"""
    p = Parser("  Hello  ")
    assert p.remove_spaces("  Hello l'o  ") == "Hello lo"
Beispiel #24
0
    Building only one graph for cross-lingual or cross-domain,
    Building data-collector(manage vocabs, embedds and datasets) and
    model-builder(manage session and subgraph) for each treebank or language,
    Global-Graph-Manager(base_model) manage each session

    """
    if gconfig.train and gconfig.data_model == 'seperate-data_seperate-model':
        models = []
        data_collectors = []
        for config in config_list:
            # set data loader for each treebank, load vocabs, embeddings and datasets
            data_loader = DataCollector(config, trainFLAG=True, devFLAG=True)
            data_collectors.append(data_loader)
            # build model
            if gconfig.task == 'parse':
                model = Parser(config, data_loader)
            elif gconfig.task == 'pos':
                model = Tagger(config, data_loader)
            else:
                raise TypeError("Do not support %s!" % (config.task))
            model.build()
            models.append(model)
        # global trainer
        trainer = BaseModel(models)
        trainer.init_session()
        trainer.train_models()
    """ Testing """
    if gconfig.test and gconfig.data_model == 'seperate-data_seperate-model':
        models = []
        data_collectors = []
        for config in config_list:
Beispiel #25
0
def test_create_parser():
    """test to create Parser class object"""
    p = Parser("")
    assert p.sentence == ""
Beispiel #26
0
 def feed(self):
     self.parser = Parser(self.data)
     self.processor = Processor(self.parser.parsed_data)
     self.analyzer = Analyzer(self.processor.filtered_data, self.user,
                              self.trial)
def main(cfg: DictConfig) -> None:
    "The entry point for testing"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # restore the hyperparameters used for training
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)

    # create dataloaders for validation and testing
    vocabs = checkpoint["vocabs"]
    loader_val, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_val),
        "val",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )
    loader_test, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_test),
        "test",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )

    # restore the trained model checkpoint
    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    # validation
    log.info("Validating..")
    f1_score = validate(loader_val, model, cfg)
    log.info(
        "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
        % (
            f1_score.fscore,
            f1_score.complete_match,
            f1_score.precision,
            f1_score.recall,
        ))

    # testing
    log.info("Testing..")
    if cfg.beam_size > 1:
        log.info("Performing beam search..")
        f1_score = beam_search(loader_test, model, cfg)
    else:
        log.info("Running without beam search..")
        f1_score = validate(loader_test, model, cfg)
    log.info(
        "Testing F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
        % (
            f1_score.fscore,
            f1_score.complete_match,
            f1_score.precision,
            f1_score.recall,
        ))
 def __init__(self):
     self.network_url = Network().network_url
     self.parser = Parser()
     self.client = AsyncHTTPClient()
     self.logger = get_logger('http-bridge')
def train_val(cfg: DictConfig) -> None:

    # create dataloaders for training and validation
    loader_train, vocabs = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_train),
        "train",
        cfg.encoder,
        None,
        cfg.batch_size,
        cfg.num_workers,
    )
    assert vocabs is not None
    loader_val, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_val),
        "val",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )

    # create the model
    model = Parser(vocabs, cfg)
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % count_params(model))

    # create the optimizer
    optimizer = torch.optim.RMSprop(
        model.parameters(),
        lr=cfg.learning_rate,
        weight_decay=cfg.weight_decay,
    )
    start_epoch = 0
    if cfg.resume is not None:  # resume training from a checkpoint
        checkpoint = load_model(cfg.resume)
        model.load_state_dict(checkpoint["model_state"])
        start_epoch = checkpoint["epoch"] + 1
        optimizer.load_state_dict(checkpoint["optimizer_state"])
        del checkpoint
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode="max",
        factor=0.5,
        patience=cfg.learning_rate_patience,
        cooldown=cfg.learning_rate_cooldown,
        verbose=True,
    )

    # start training and validation
    best_f1_score = -1.0
    num_iters = 0

    for epoch in range(start_epoch, cfg.num_epochs):
        log.info("Epoch #%d" % epoch)

        if not cfg.skip_training:
            log.info("Training..")
            num_iters, accuracy_train, loss_train = train(
                num_iters,
                loader_train,
                model,
                optimizer,
                vocabs["label"],
                cfg,
            )
            log.info("Action accuracy: %.03f, Loss: %.03f" %
                     (accuracy_train, loss_train))

        log.info("Validating..")
        f1_score_val = validate(loader_val, model, cfg)

        log.info(
            "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
            % (
                f1_score_val.fscore,
                f1_score_val.complete_match,
                f1_score_val.precision,
                f1_score_val.recall,
            ))

        if f1_score_val.fscore > best_f1_score:
            log.info("F1 score has improved")
            best_f1_score = f1_score_val.fscore

        scheduler.step(best_f1_score)

        save_checkpoint(
            "model_latest.pth",
            epoch,
            model,
            optimizer,
            f1_score_val.fscore,
            vocabs,
            cfg,
        )