Esempio n. 1
0
def test_remove_all_accents():
    """test to transform special characters into
    normal characters"""

    p = Parser("éèêëãàäâåîïìöôòõñûüÿ")
    assert p.remove_all_accents(
        "éèêëãàäâåîïìöôòõñûüÿ") == "eeeeaaaaaiiioooonuuy"
Esempio n. 2
0
def test_Parser_delete_article():
    """Test article deletion function"""

    question = 'le musee du louvre?'
    parser = Parser(question)
    result = parser._delete_article(question)
    assert result == "musee du louvre?"
Esempio n. 3
0
def test_take_away_special_characters():
    """Test to remove special characters from a sentence"""

    p = Parser("Hello my name is Jon ,?;.:/!-*+%$€_£¤=@|°}]¨[(){'#~&²")
    assert p.remove_all_special_characters(
        "hello my name is Jon,?;.:/!-*+%$€_£¤=@|°}]¨[(){'#~&²"
    ) == "Hello my name is Jon                                 "
Esempio n. 4
0
def test_Parser_clean():
    """Cleaning function test"""

    question = 'Ou se trouve le musée du Louvre?'
    parser = Parser(question)
    result = parser.clean()
    assert result == "musee du louvre"
Esempio n. 5
0
def grandpy(user_text):
    """Application class instance"""

    data = {"status": False}

    parser = Parser(user_text)
    result = parser.clean()

    api_google = ApiGoogle()
    adress, coo = api_google.api_reading(result)

    if adress and coo:
        api_wiki = ApiWikipedia()
        page_id = api_wiki.api_get_page_id(**coo)

        if page_id:
            extract, url = api_wiki.api_get_extract(page_id)
            data = {
                "status": True,
                "question": user_text,
                "article": extract,
                "coords": coo,
                "url": url,
                "adress": adress,
                "response": "Voilà l'endroit demandé mon petit !"
            }

    if not data.get("status"):
        data = {
            "question": user_text,
            "status": False,
            "response": "Je n'ai pas la reponse à la question..!!"
        }

    return data
Esempio n. 6
0
def test_remove_stop_words():
    """test to remove stop words with a new
    sentence"""

    sentence = "salut grandpy a quel endroit se trouve le musee du louvre"
    p = Parser(sentence)
    assert p.remove_stop_words(sentence) == "musee louvre"
Esempio n. 7
0
def find_place_in_sentence(text):
    """ Returns the position of the place the usertext
    is looking for """
    parser = Parser(text)
    parser.capture_regular_expression(text)
    address_ask_by_user = parser.address
    return address_ask_by_user
Esempio n. 8
0
def test_Parser_extract_place():
    """Place extraction function test"""

    question = 'ou se trouve le musee du louvre?'
    parser = Parser(question)
    result = parser._extract_place(question)
    assert result == " le musee du louvre"
Esempio n. 9
0
def test_capture_regular_expression():
    """Test to capture the location and the
    words  that introduces a question about a
    place"""
    
    sentence = "Salut grandpy! Comment s'est passé ta soirée avec Grandma hier soir? Au fait, pendant que j'y pense, pourrais-tu m'indiquer où se trouve le musée d'art et d'histoire de Fribourg, s'il te plaît?"
    p = Parser(sentence)
    assert p.capture_regular_expression(
        sentence) == " où se trouve le musée d'art et d'histoire de Fribourg"
Esempio n. 10
0
 def get(self):
     interval = int(self.request.get('interval', self.default_interval))
     poller = Poller(interval)
     # search all tweets
     tweets = poller.search()
     # parse them
     parser = Parser()
     parsed_tweets = filter(None, [parser.parse_tweet(tweet) for tweet in tweets])
     # and finally write them to the datastore
     Tweet.multi_save(parsed_tweets)
Esempio n. 11
0
def test_create_string_values_parses_to_0s():
    data = "1,2,foo;"
    parser = Parser(data)

    assert parser.parsed_data == [[[1.0, 2.0, 0.0], [0, 0, 0]]]

    data = "1,2,foo|4,bar,6;"
    parser = Parser(data)

    assert parser.parsed_data == [[[1.0, 2.0, 0.0], [4.0, 0.0, 6.0]]]
Esempio n. 12
0
def validate(loader: torch.utils.data.DataLoader, model: Parser,
             cfg: DictConfig) -> FScore:  # type: ignore
    "Run validation/testing without beam search"

    model.eval()
    # testing requires far less GPU memory than training
    # so there is no need to split a batch into multiple subbatches
    env = Environment(loader, model.encoder, subbatch_max_tokens=9999999)
    state = env.reset()

    pred_trees = []
    gt_trees = []
    time_start = time()

    with torch.no_grad():  # type: ignore
        while True:
            with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                actions, _ = model(state)

            if cfg.decoder == "graph":
                # actions for a single step
                state, done = env.step(actions)
                if not done:
                    continue
            else:
                assert cfg.decoder == "sequence"
                # actions for all steps
                for n_step in itertools.count():
                    a_t = [
                        action_seq[n_step] for action_seq in actions
                        if len(action_seq) > n_step
                    ]
                    _, done = env.step(a_t)
                    if done:
                        break

            pred_trees.extend(env.pred_trees)
            gt_trees.extend(env.gt_trees)

            # load the next batch
            try:
                with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                    state = env.reset()
            except EpochEnd:
                # no next batch available (complete)
                f1_score = evalb(
                    hydra.utils.to_absolute_path("./EVALB"),
                    gt_trees,
                    pred_trees  # type: ignore
                )
                log.info("Time elapsed: %f" % (time() - time_start))
                return f1_score
Esempio n. 13
0
    def post():
        """Post endpoint for registering users into the database.

        Takes params username and password to put into database. Usernames
        cannot be duplicated and will return json message and 400 error if tried.
        On successful user creation will return json message and 201 code"""
        parser = Parser()
        parser.required_fields('username', 'password')
        data = parser.parse_args()
        if UserModel.find_by_username(data['username']):
            return {'message': 'User already exists'}, 400
        UserModel(**data).save_to_db()
        return {'message': 'User created successfully'}, 201
Esempio n. 14
0
def test_create_separated_data():
    data = '0.028,-0.072,5|0.129,-0.945,-5;0,-0.07,0.06|0.123,-0.947,5;0.2,-1,2|0.1,-0.9,3;'
    parser = Parser(data)
    processor = Processor(parser.parsed_data)

    assert processor.dot_product_data == [-24.928348, 0.36629, 6.92]
    assert processor.filtered_data == [0, 0, -1.7004231121083724]
Esempio n. 15
0
 def __init__(self):
     super(Downloader, self).__init__()
     self.courses = []
     self.parser = Parser()
     self.manager = Manager()
     self.course_queue = self.manager.Queue()
     self.lect_queue = self.manager.Queue()
Esempio n. 16
0
def test_create_combined_data():
    data = '0.123,-0.123,5;0.456,-0.789,0.111;-0.212,0.001,1;'
    parser = Parser(data)
    processor = Processor(parser.parsed_data)

    assert processor.dot_product_data == [0.0, 0.0, 0.0005219529804999682]
    assert processor.filtered_data == [0, 0, 4.753597533351234e-05]
def beam_search(
        loader: torch.utils.data.DataLoader,
        model: Parser,
        cfg: DictConfig  # type: ignore
) -> FScore:
    "Run validation/testing with beam search"

    model.eval()
    device, _ = get_device()
    gt_trees = []
    pred_trees = []
    bar = ProgressBar(max_value=len(loader))
    time_start = time()

    with torch.no_grad():  # type: ignore

        for i, data_batch in enumerate(loader):
            # calculate token embeddings
            tokens_emb = model.encoder(
                data_batch["tokens_idx"].to(device=device, non_blocking=True),
                data_batch["tags_idx"].to(device=device, non_blocking=True),
                data_batch["valid_tokens_mask"].to(device=device,
                                                   non_blocking=True),
                data_batch["word_end_mask"].to(device=device,
                                               non_blocking=True),
            )
            # initialize the beam
            beam = Beam(
                data_batch["tokens_word"],
                data_batch["tags"],
                tokens_emb,
                model,
                cfg,
            )
            # keep executing actions and updating the beam until the entire batch is finished
            while not beam.grow():
                pass

            gt_trees.extend(data_batch["trees"])
            pred_trees.extend(beam.best_trees())

            bar.update(i)

    f1_score = evalb(hydra.utils.to_absolute_path("./EVALB"), gt_trees,
                     pred_trees)
    log.info("Time elapsed: %f" % (time() - time_start))
    return f1_score
Esempio n. 18
0
def test_create_separated_data():
    data = '0.028,-0.072,5|0.129,-0.945,-5;0,-0.07,0.06|0.123,-0.947,5;0.2,-1,2|0.1,-0.9,3;'
    parser = Parser(data)

    assert parser.parsed_data == [[[0.028, -0.072, 5.0], [0.129, -0.945,
                                                          -5.0]],
                                  [[0.0, -0.07, 0.06], [0.123, -0.947, 5.0]],
                                  [[0.2, -1.0, 2.0], [0.1, -0.9, 3.0]]]
Esempio n. 19
0
def main(cfg: DictConfig) -> None:
    "The entry point for parsing user-provided texts"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    assert cfg.input is not None
    assert cfg.language in ("english", "chinese")
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # load the model checkpoint
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)
    vocabs = checkpoint["vocabs"]

    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    input_file = hydra.utils.to_absolute_path(cfg.input)
    ds = UserProvidedTexts(input_file, cfg.language, vocabs, cfg.encoder)
    loader = DataLoader(
        ds,
        batch_size=cfg.eval_batch_size,
        collate_fn=form_batch,
        num_workers=cfg.num_workers,
        pin_memory=torch.cuda.is_available(),
    )

    env = Environment(loader, model.encoder, subbatch_max_tokens=9999999)
    state = env.reset()
    oup = (sys.stdout if cfg.output is None else open(
        hydra.utils.to_absolute_path(cfg.output), "wt"))
    time_start = time()

    with torch.no_grad():  # type: ignore
        while True:
            with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                actions, _ = model(state)
            state, done = env.step(actions)
            if done:
                for tree in env.pred_trees:
                    assert tree is not None
                    print(tree.linearize(), file=oup)
                # pred_trees.extend(env.pred_trees)
                # load the next batch
                try:
                    with torch.cuda.amp.autocast(cfg.amp):  # type: ignore
                        state = env.reset()
                except EpochEnd:
                    # no next batch available (complete)
                    log.info("Time elapsed: %f" % (time() - time_start))
                    break

    if cfg.output is not None:
        log.info("Parse trees saved to %s" % cfg.output)
Esempio n. 20
0
def test_create_non_zero_data():
    user = User('female', 167, 70)
    trial = Trial('walk 1', 100, 18)
    parser = Parser(open('test/data/female-167-70_walk2-100-10.txt').read())
    processor = Processor(parser.parsed_data)
    analyzer = Analyzer(processor.filtered_data, user, trial)

    assert analyzer.steps == 10
    assert analyzer.delta == -8
    assert analyzer.distance == 700
    assert analyzer.time == 1037 / 100
Esempio n. 21
0
def test_create_combined_data():
    data = '0.123,-0.123,5;0.456,-0.789,0.111;-0.212,0.001,1;'
    parser = Parser(data)

    assert parser.parsed_data == [
        [[0.123, -0.123, 5.0], [0, 0, 0]], [[0.456, -0.789, 0.111], [0, 0, 0]],
        [[-0.2120710948533322, 0.0011468544965549535, 0.9994625125426089],
         [
             7.109485333219216e-05, -0.00014685449655495343,
             0.0005374874573911294
         ]]
    ]
Esempio n. 22
0
    def post(self):
        """Post endpoint for adding requests into the database.

        Headers: {Authorization: JWT jwt_token, ContentType: application/json}
        Body must be json with priority, target_date, product_area, client_name,
        title, description fields. Database must have matching client_name or
        will return json message 400 error. If error occurs while inserting into
        database will return json message and 500 error. On successful insert
        into database returns json of request and 201 code."""
        parser = Parser()
        parser.required_fields(
            'priority',
            'target_date',
            'product_area',
            'client_name',
            'title',
            'description')
        data = parser.parse_args()
        if not ClientModel.select(data['client_name']):
            return {
                'message': 'Client does not exist'}, 400
        request = RequestModel(**data)
        client_name = data['client_name']
        priority = data['priority']
        update_list = []
        try:
            while RequestModel.select_same_priority(client_name, priority):
                update_list.append(
                    RequestModel.select_same_priority(
                        client_name, priority))
                priority += 1
            request.save_to_db()
            for req in update_list:
                req.priority += 1
                req.save_to_db()
        except BaseException:
            return {'message': 'Something went wrong'}, 500
        return request.json(), 201
Esempio n. 23
0
def ajax():
    """"""
    data = {"status": False}
    user_text = request.form["userText"]

    logger.debug("Question posée :" + user_text)

    parser = Parser(user_text)
    result = parser.clean()

    api_google = ApiGoogle()
    adress, coo = api_google.api_reading(result)

    if adress and coo:
        api_wiki = ApiWikipedia()
        page_id = api_wiki.api_get_page_id(**coo)

        if page_id:
            extract, url = api_wiki.api_get_extract(page_id)
            data = {
                "status": True,
                "question": user_text,
                "article": extract,
                "coords": coo,
                "url": url,
                "adress": adress,
                "response": "Voilà l'endroit demandé mon petit !"
            }

    if not data.get("status"):
        data = {
            "question": user_text,
            "status": False,
            "response": "Je n'ai pas la reponse à la question..!!"
        }

    return jsonify(data)
class HttpBridge:

    def __init__(self):
        self.network_url = Network().network_url
        self.parser = Parser()
        self.client = AsyncHTTPClient()
        self.logger = get_logger('http-bridge')

    async def get(self, path: str, params={}):
        endpoint_url = urljoin(self.network_url, path)
        self.logger.info('GET %s params: %s', endpoint_url, params)
        try:
            resp = await self.client.fetch(endpoint_url, method='GET')
            return resp
        except HTTPClientError as e:
            if e.code == 'ECONNREFUSED':
                raise Exception('cardano-http-bridge is not accessible (ECONNREFUSED)')

            raise

    async def post(self, path: str, data: str):
        endpoint_url = urljoin(self.network_url, path)
        self.logger.info('POST %s data: %s', endpoint_url, data)
        try:
            resp = await self.client.fetch(endpoint_url, method='POST', body=data)
            return resp
        except HTTPClientError as e:
            if e.code == 'ECONNREFUSED':
                raise Exception('cardano-http-bridge is not accessible (ECONNREFUSED)')

            raise

    async def get_json(self, path: str):
        resp = await self.get(path)
        try:
            resp = json.loads(resp.body)
            return resp
        except Exception as e:
            raise Exception('invalid json resp: %s' % str(resp.body)[:100])

    async def get_tip(self):
        resp = await self.get_json('tip')
        return resp

    async def post_signed_tx(self, payload: str):
        resp = await self.post('txs/signed', payload)
        return resp

    async def get_epoch(self, id: int):
        resp = await self.get_json(f'epoch/{id}')
        return resp

    async def get_block(self, id: str): 
        resp = await self.get_json(f'block/{id}')
        return resp

    async def get_genesis(self, hash: str): 
        return await self.get_json(f'genesis/{hash}')

    async def get_status(self): 
        resp = await self.get_json('status')
        return resp

    async def get_block_by_height(self, height: int):
        resp = await self.get(f'height/{height}')
        return self.parser.parse_block(resp.body)

    async def get_parsed_epoch_by_id(self, epoch_id: int, is_omit_ebb=False):
        resp = await self.get(f'epoch/{epoch_id}')
        blocks_iterator = self.parser.parse_epoch(resp.body, {'omitEbb': is_omit_ebb})

        return blocks_iterator
 def __init__(self):
     self.network_url = Network().network_url
     self.parser = Parser()
     self.client = AsyncHTTPClient()
     self.logger = get_logger('http-bridge')
Esempio n. 26
0
def test_get_lower_cases():
    """test to transform every letter
    in to lower"""
    p = Parser("Hello")
    assert p.get_lower_cases() == "hello"
Esempio n. 27
0
def test_remove_spaces():
    """test to remove spaces in a sentence"""
    p = Parser("  Hello  ")
    assert p.remove_spaces("  Hello l'o  ") == "Hello lo"
Esempio n. 28
0
def test_create_parser():
    """test to create Parser class object"""
    p = Parser("")
    assert p.sentence == ""
Esempio n. 29
0
 def feed(self):
     self.parser = Parser(self.data)
     self.processor = Processor(self.parser.parsed_data)
     self.analyzer = Analyzer(self.processor.filtered_data, self.user,
                              self.trial)
Esempio n. 30
0
def main(cfg: DictConfig) -> None:
    "The entry point for testing"

    assert cfg.model_path is not None, "Need to specify model_path for testing."
    log.info("\n" + OmegaConf.to_yaml(cfg))

    # restore the hyperparameters used for training
    model_path = hydra.utils.to_absolute_path(cfg.model_path)
    log.info("Loading the model from %s" % model_path)
    checkpoint = load_model(model_path)
    restore_hyperparams(checkpoint["cfg"], cfg)

    # create dataloaders for validation and testing
    vocabs = checkpoint["vocabs"]
    loader_val, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_val),
        "val",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )
    loader_test, _ = create_dataloader(
        hydra.utils.to_absolute_path(cfg.path_test),
        "test",
        cfg.encoder,
        vocabs,
        cfg.eval_batch_size,
        cfg.num_workers,
    )

    # restore the trained model checkpoint
    model = Parser(vocabs, cfg)
    model.load_state_dict(checkpoint["model_state"])
    device, _ = get_device()
    model.to(device)
    log.info("\n" + str(model))
    log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()]))

    # validation
    log.info("Validating..")
    f1_score = validate(loader_val, model, cfg)
    log.info(
        "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
        % (
            f1_score.fscore,
            f1_score.complete_match,
            f1_score.precision,
            f1_score.recall,
        ))

    # testing
    log.info("Testing..")
    if cfg.beam_size > 1:
        log.info("Performing beam search..")
        f1_score = beam_search(loader_test, model, cfg)
    else:
        log.info("Running without beam search..")
        f1_score = validate(loader_test, model, cfg)
    log.info(
        "Testing F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f"
        % (
            f1_score.fscore,
            f1_score.complete_match,
            f1_score.precision,
            f1_score.recall,
        ))
Esempio n. 31
0
    Building only one graph for cross-lingual or cross-domain,
    Building data-collector(manage vocabs, embedds and datasets) and
    model-builder(manage session and subgraph) for each treebank or language,
    Global-Graph-Manager(base_model) manage each session

    """
    if gconfig.train and gconfig.data_model == 'seperate-data_seperate-model':
        models = []
        data_collectors = []
        for config in config_list:
            # set data loader for each treebank, load vocabs, embeddings and datasets
            data_loader = DataCollector(config, trainFLAG=True, devFLAG=True)
            data_collectors.append(data_loader)
            # build model
            if gconfig.task == 'parse':
                model = Parser(config, data_loader)
            elif gconfig.task == 'pos':
                model = Tagger(config, data_loader)
            else:
                raise TypeError("Do not support %s!" % (config.task))
            model.build()
            models.append(model)
        # global trainer
        trainer = BaseModel(models)
        trainer.init_session()
        trainer.train_models()
    """ Testing """
    if gconfig.test and gconfig.data_model == 'seperate-data_seperate-model':
        models = []
        data_collectors = []
        for config in config_list: