def test_remove_all_accents(): """test to transform special characters into normal characters""" p = Parser("éèêëãàäâåîïìöôòõñûüÿ") assert p.remove_all_accents( "éèêëãàäâåîïìöôòõñûüÿ") == "eeeeaaaaaiiioooonuuy"
def test_Parser_delete_article(): """Test article deletion function""" question = 'le musee du louvre?' parser = Parser(question) result = parser._delete_article(question) assert result == "musee du louvre?"
def test_take_away_special_characters(): """Test to remove special characters from a sentence""" p = Parser("Hello my name is Jon ,?;.:/!-*+%$€_£¤=@|°}]¨[(){'#~&²") assert p.remove_all_special_characters( "hello my name is Jon,?;.:/!-*+%$€_£¤=@|°}]¨[(){'#~&²" ) == "Hello my name is Jon "
def test_Parser_clean(): """Cleaning function test""" question = 'Ou se trouve le musée du Louvre?' parser = Parser(question) result = parser.clean() assert result == "musee du louvre"
def grandpy(user_text): """Application class instance""" data = {"status": False} parser = Parser(user_text) result = parser.clean() api_google = ApiGoogle() adress, coo = api_google.api_reading(result) if adress and coo: api_wiki = ApiWikipedia() page_id = api_wiki.api_get_page_id(**coo) if page_id: extract, url = api_wiki.api_get_extract(page_id) data = { "status": True, "question": user_text, "article": extract, "coords": coo, "url": url, "adress": adress, "response": "Voilà l'endroit demandé mon petit !" } if not data.get("status"): data = { "question": user_text, "status": False, "response": "Je n'ai pas la reponse à la question..!!" } return data
def test_remove_stop_words(): """test to remove stop words with a new sentence""" sentence = "salut grandpy a quel endroit se trouve le musee du louvre" p = Parser(sentence) assert p.remove_stop_words(sentence) == "musee louvre"
def find_place_in_sentence(text): """ Returns the position of the place the usertext is looking for """ parser = Parser(text) parser.capture_regular_expression(text) address_ask_by_user = parser.address return address_ask_by_user
def test_Parser_extract_place(): """Place extraction function test""" question = 'ou se trouve le musee du louvre?' parser = Parser(question) result = parser._extract_place(question) assert result == " le musee du louvre"
def test_capture_regular_expression(): """Test to capture the location and the words that introduces a question about a place""" sentence = "Salut grandpy! Comment s'est passé ta soirée avec Grandma hier soir? Au fait, pendant que j'y pense, pourrais-tu m'indiquer où se trouve le musée d'art et d'histoire de Fribourg, s'il te plaît?" p = Parser(sentence) assert p.capture_regular_expression( sentence) == " où se trouve le musée d'art et d'histoire de Fribourg"
def get(self): interval = int(self.request.get('interval', self.default_interval)) poller = Poller(interval) # search all tweets tweets = poller.search() # parse them parser = Parser() parsed_tweets = filter(None, [parser.parse_tweet(tweet) for tweet in tweets]) # and finally write them to the datastore Tweet.multi_save(parsed_tweets)
def test_create_string_values_parses_to_0s(): data = "1,2,foo;" parser = Parser(data) assert parser.parsed_data == [[[1.0, 2.0, 0.0], [0, 0, 0]]] data = "1,2,foo|4,bar,6;" parser = Parser(data) assert parser.parsed_data == [[[1.0, 2.0, 0.0], [4.0, 0.0, 6.0]]]
def validate(loader: torch.utils.data.DataLoader, model: Parser, cfg: DictConfig) -> FScore: # type: ignore "Run validation/testing without beam search" model.eval() # testing requires far less GPU memory than training # so there is no need to split a batch into multiple subbatches env = Environment(loader, model.encoder, subbatch_max_tokens=9999999) state = env.reset() pred_trees = [] gt_trees = [] time_start = time() with torch.no_grad(): # type: ignore while True: with torch.cuda.amp.autocast(cfg.amp): # type: ignore actions, _ = model(state) if cfg.decoder == "graph": # actions for a single step state, done = env.step(actions) if not done: continue else: assert cfg.decoder == "sequence" # actions for all steps for n_step in itertools.count(): a_t = [ action_seq[n_step] for action_seq in actions if len(action_seq) > n_step ] _, done = env.step(a_t) if done: break pred_trees.extend(env.pred_trees) gt_trees.extend(env.gt_trees) # load the next batch try: with torch.cuda.amp.autocast(cfg.amp): # type: ignore state = env.reset() except EpochEnd: # no next batch available (complete) f1_score = evalb( hydra.utils.to_absolute_path("./EVALB"), gt_trees, pred_trees # type: ignore ) log.info("Time elapsed: %f" % (time() - time_start)) return f1_score
def post(): """Post endpoint for registering users into the database. Takes params username and password to put into database. Usernames cannot be duplicated and will return json message and 400 error if tried. On successful user creation will return json message and 201 code""" parser = Parser() parser.required_fields('username', 'password') data = parser.parse_args() if UserModel.find_by_username(data['username']): return {'message': 'User already exists'}, 400 UserModel(**data).save_to_db() return {'message': 'User created successfully'}, 201
def test_create_separated_data(): data = '0.028,-0.072,5|0.129,-0.945,-5;0,-0.07,0.06|0.123,-0.947,5;0.2,-1,2|0.1,-0.9,3;' parser = Parser(data) processor = Processor(parser.parsed_data) assert processor.dot_product_data == [-24.928348, 0.36629, 6.92] assert processor.filtered_data == [0, 0, -1.7004231121083724]
def __init__(self): super(Downloader, self).__init__() self.courses = [] self.parser = Parser() self.manager = Manager() self.course_queue = self.manager.Queue() self.lect_queue = self.manager.Queue()
def test_create_combined_data(): data = '0.123,-0.123,5;0.456,-0.789,0.111;-0.212,0.001,1;' parser = Parser(data) processor = Processor(parser.parsed_data) assert processor.dot_product_data == [0.0, 0.0, 0.0005219529804999682] assert processor.filtered_data == [0, 0, 4.753597533351234e-05]
def beam_search( loader: torch.utils.data.DataLoader, model: Parser, cfg: DictConfig # type: ignore ) -> FScore: "Run validation/testing with beam search" model.eval() device, _ = get_device() gt_trees = [] pred_trees = [] bar = ProgressBar(max_value=len(loader)) time_start = time() with torch.no_grad(): # type: ignore for i, data_batch in enumerate(loader): # calculate token embeddings tokens_emb = model.encoder( data_batch["tokens_idx"].to(device=device, non_blocking=True), data_batch["tags_idx"].to(device=device, non_blocking=True), data_batch["valid_tokens_mask"].to(device=device, non_blocking=True), data_batch["word_end_mask"].to(device=device, non_blocking=True), ) # initialize the beam beam = Beam( data_batch["tokens_word"], data_batch["tags"], tokens_emb, model, cfg, ) # keep executing actions and updating the beam until the entire batch is finished while not beam.grow(): pass gt_trees.extend(data_batch["trees"]) pred_trees.extend(beam.best_trees()) bar.update(i) f1_score = evalb(hydra.utils.to_absolute_path("./EVALB"), gt_trees, pred_trees) log.info("Time elapsed: %f" % (time() - time_start)) return f1_score
def test_create_separated_data(): data = '0.028,-0.072,5|0.129,-0.945,-5;0,-0.07,0.06|0.123,-0.947,5;0.2,-1,2|0.1,-0.9,3;' parser = Parser(data) assert parser.parsed_data == [[[0.028, -0.072, 5.0], [0.129, -0.945, -5.0]], [[0.0, -0.07, 0.06], [0.123, -0.947, 5.0]], [[0.2, -1.0, 2.0], [0.1, -0.9, 3.0]]]
def main(cfg: DictConfig) -> None: "The entry point for parsing user-provided texts" assert cfg.model_path is not None, "Need to specify model_path for testing." assert cfg.input is not None assert cfg.language in ("english", "chinese") log.info("\n" + OmegaConf.to_yaml(cfg)) # load the model checkpoint model_path = hydra.utils.to_absolute_path(cfg.model_path) log.info("Loading the model from %s" % model_path) checkpoint = load_model(model_path) restore_hyperparams(checkpoint["cfg"], cfg) vocabs = checkpoint["vocabs"] model = Parser(vocabs, cfg) model.load_state_dict(checkpoint["model_state"]) device, _ = get_device() model.to(device) log.info("\n" + str(model)) log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()])) input_file = hydra.utils.to_absolute_path(cfg.input) ds = UserProvidedTexts(input_file, cfg.language, vocabs, cfg.encoder) loader = DataLoader( ds, batch_size=cfg.eval_batch_size, collate_fn=form_batch, num_workers=cfg.num_workers, pin_memory=torch.cuda.is_available(), ) env = Environment(loader, model.encoder, subbatch_max_tokens=9999999) state = env.reset() oup = (sys.stdout if cfg.output is None else open( hydra.utils.to_absolute_path(cfg.output), "wt")) time_start = time() with torch.no_grad(): # type: ignore while True: with torch.cuda.amp.autocast(cfg.amp): # type: ignore actions, _ = model(state) state, done = env.step(actions) if done: for tree in env.pred_trees: assert tree is not None print(tree.linearize(), file=oup) # pred_trees.extend(env.pred_trees) # load the next batch try: with torch.cuda.amp.autocast(cfg.amp): # type: ignore state = env.reset() except EpochEnd: # no next batch available (complete) log.info("Time elapsed: %f" % (time() - time_start)) break if cfg.output is not None: log.info("Parse trees saved to %s" % cfg.output)
def test_create_non_zero_data(): user = User('female', 167, 70) trial = Trial('walk 1', 100, 18) parser = Parser(open('test/data/female-167-70_walk2-100-10.txt').read()) processor = Processor(parser.parsed_data) analyzer = Analyzer(processor.filtered_data, user, trial) assert analyzer.steps == 10 assert analyzer.delta == -8 assert analyzer.distance == 700 assert analyzer.time == 1037 / 100
def test_create_combined_data(): data = '0.123,-0.123,5;0.456,-0.789,0.111;-0.212,0.001,1;' parser = Parser(data) assert parser.parsed_data == [ [[0.123, -0.123, 5.0], [0, 0, 0]], [[0.456, -0.789, 0.111], [0, 0, 0]], [[-0.2120710948533322, 0.0011468544965549535, 0.9994625125426089], [ 7.109485333219216e-05, -0.00014685449655495343, 0.0005374874573911294 ]] ]
def post(self): """Post endpoint for adding requests into the database. Headers: {Authorization: JWT jwt_token, ContentType: application/json} Body must be json with priority, target_date, product_area, client_name, title, description fields. Database must have matching client_name or will return json message 400 error. If error occurs while inserting into database will return json message and 500 error. On successful insert into database returns json of request and 201 code.""" parser = Parser() parser.required_fields( 'priority', 'target_date', 'product_area', 'client_name', 'title', 'description') data = parser.parse_args() if not ClientModel.select(data['client_name']): return { 'message': 'Client does not exist'}, 400 request = RequestModel(**data) client_name = data['client_name'] priority = data['priority'] update_list = [] try: while RequestModel.select_same_priority(client_name, priority): update_list.append( RequestModel.select_same_priority( client_name, priority)) priority += 1 request.save_to_db() for req in update_list: req.priority += 1 req.save_to_db() except BaseException: return {'message': 'Something went wrong'}, 500 return request.json(), 201
def ajax(): """""" data = {"status": False} user_text = request.form["userText"] logger.debug("Question posée :" + user_text) parser = Parser(user_text) result = parser.clean() api_google = ApiGoogle() adress, coo = api_google.api_reading(result) if adress and coo: api_wiki = ApiWikipedia() page_id = api_wiki.api_get_page_id(**coo) if page_id: extract, url = api_wiki.api_get_extract(page_id) data = { "status": True, "question": user_text, "article": extract, "coords": coo, "url": url, "adress": adress, "response": "Voilà l'endroit demandé mon petit !" } if not data.get("status"): data = { "question": user_text, "status": False, "response": "Je n'ai pas la reponse à la question..!!" } return jsonify(data)
class HttpBridge: def __init__(self): self.network_url = Network().network_url self.parser = Parser() self.client = AsyncHTTPClient() self.logger = get_logger('http-bridge') async def get(self, path: str, params={}): endpoint_url = urljoin(self.network_url, path) self.logger.info('GET %s params: %s', endpoint_url, params) try: resp = await self.client.fetch(endpoint_url, method='GET') return resp except HTTPClientError as e: if e.code == 'ECONNREFUSED': raise Exception('cardano-http-bridge is not accessible (ECONNREFUSED)') raise async def post(self, path: str, data: str): endpoint_url = urljoin(self.network_url, path) self.logger.info('POST %s data: %s', endpoint_url, data) try: resp = await self.client.fetch(endpoint_url, method='POST', body=data) return resp except HTTPClientError as e: if e.code == 'ECONNREFUSED': raise Exception('cardano-http-bridge is not accessible (ECONNREFUSED)') raise async def get_json(self, path: str): resp = await self.get(path) try: resp = json.loads(resp.body) return resp except Exception as e: raise Exception('invalid json resp: %s' % str(resp.body)[:100]) async def get_tip(self): resp = await self.get_json('tip') return resp async def post_signed_tx(self, payload: str): resp = await self.post('txs/signed', payload) return resp async def get_epoch(self, id: int): resp = await self.get_json(f'epoch/{id}') return resp async def get_block(self, id: str): resp = await self.get_json(f'block/{id}') return resp async def get_genesis(self, hash: str): return await self.get_json(f'genesis/{hash}') async def get_status(self): resp = await self.get_json('status') return resp async def get_block_by_height(self, height: int): resp = await self.get(f'height/{height}') return self.parser.parse_block(resp.body) async def get_parsed_epoch_by_id(self, epoch_id: int, is_omit_ebb=False): resp = await self.get(f'epoch/{epoch_id}') blocks_iterator = self.parser.parse_epoch(resp.body, {'omitEbb': is_omit_ebb}) return blocks_iterator
def __init__(self): self.network_url = Network().network_url self.parser = Parser() self.client = AsyncHTTPClient() self.logger = get_logger('http-bridge')
def test_get_lower_cases(): """test to transform every letter in to lower""" p = Parser("Hello") assert p.get_lower_cases() == "hello"
def test_remove_spaces(): """test to remove spaces in a sentence""" p = Parser(" Hello ") assert p.remove_spaces(" Hello l'o ") == "Hello lo"
def test_create_parser(): """test to create Parser class object""" p = Parser("") assert p.sentence == ""
def feed(self): self.parser = Parser(self.data) self.processor = Processor(self.parser.parsed_data) self.analyzer = Analyzer(self.processor.filtered_data, self.user, self.trial)
def main(cfg: DictConfig) -> None: "The entry point for testing" assert cfg.model_path is not None, "Need to specify model_path for testing." log.info("\n" + OmegaConf.to_yaml(cfg)) # restore the hyperparameters used for training model_path = hydra.utils.to_absolute_path(cfg.model_path) log.info("Loading the model from %s" % model_path) checkpoint = load_model(model_path) restore_hyperparams(checkpoint["cfg"], cfg) # create dataloaders for validation and testing vocabs = checkpoint["vocabs"] loader_val, _ = create_dataloader( hydra.utils.to_absolute_path(cfg.path_val), "val", cfg.encoder, vocabs, cfg.eval_batch_size, cfg.num_workers, ) loader_test, _ = create_dataloader( hydra.utils.to_absolute_path(cfg.path_test), "test", cfg.encoder, vocabs, cfg.eval_batch_size, cfg.num_workers, ) # restore the trained model checkpoint model = Parser(vocabs, cfg) model.load_state_dict(checkpoint["model_state"]) device, _ = get_device() model.to(device) log.info("\n" + str(model)) log.info("#parameters = %d" % sum([p.numel() for p in model.parameters()])) # validation log.info("Validating..") f1_score = validate(loader_val, model, cfg) log.info( "Validation F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f" % ( f1_score.fscore, f1_score.complete_match, f1_score.precision, f1_score.recall, )) # testing log.info("Testing..") if cfg.beam_size > 1: log.info("Performing beam search..") f1_score = beam_search(loader_test, model, cfg) else: log.info("Running without beam search..") f1_score = validate(loader_test, model, cfg) log.info( "Testing F1 score: %.03f, Exact match: %.03f, Precision: %.03f, Recall: %.03f" % ( f1_score.fscore, f1_score.complete_match, f1_score.precision, f1_score.recall, ))
Building only one graph for cross-lingual or cross-domain, Building data-collector(manage vocabs, embedds and datasets) and model-builder(manage session and subgraph) for each treebank or language, Global-Graph-Manager(base_model) manage each session """ if gconfig.train and gconfig.data_model == 'seperate-data_seperate-model': models = [] data_collectors = [] for config in config_list: # set data loader for each treebank, load vocabs, embeddings and datasets data_loader = DataCollector(config, trainFLAG=True, devFLAG=True) data_collectors.append(data_loader) # build model if gconfig.task == 'parse': model = Parser(config, data_loader) elif gconfig.task == 'pos': model = Tagger(config, data_loader) else: raise TypeError("Do not support %s!" % (config.task)) model.build() models.append(model) # global trainer trainer = BaseModel(models) trainer.init_session() trainer.train_models() """ Testing """ if gconfig.test and gconfig.data_model == 'seperate-data_seperate-model': models = [] data_collectors = [] for config in config_list: