def printrules(rules): for rule in rules: if rule[2] == '!ssi': print(bc.CYAN, clean(rule[0]), '=>', clean(rule[1]), bc.RES) if rule[2] == 'ssi': print(bc.GREEN, clean(rule[0]), '<=>', clean(rule[1]), bc.RES) print('')
def main_menu(): """Main loop with options""" while True: choice = input(MAIN_MENU) clean() if choice.lower() == "a": add_task() elif choice.lower() == "s": search_options() elif choice.lower() == "q": break else: print("'{}' invalid option! Please try again".format(choice))
def query(search: str) -> bytes: """ Query DBpedia, return response or exit with errorcode """ search = tools.clean(search, "search") url = getAPIprefix() + search return tools.url_reader(url)
def __init__(self, url: str): """ __init__ method, initializes the variables. Args: url (str): the url to use. Raises: ValueError(f"{url} is not a valid fanfiction.net URL for a story") urllib.error.HTTPError ValueError(f"{url} is not the URL of an accessible story.") """ if not tls.is_url(url): raise ValueError(f"{url} is not a valid fanfiction.net URL for a " "story.") try: # Ensure the url used is not a mobile one after that page = tls.get_page(url.replace('://m', '://www')) except urllib.error.HTTPError as e: raise e # Here I get the informations which will be needed to create self.ifs try: text_id = tls.clean(re.search(c.TEXT_ID_REGEX, page).group(1)) except AttributeError: raise ValueError(f"{url} is not the URL of an accessible story.") num_id = url.split('/')[4] tokens = Story._get_tokens(page) chap_count = Story._get_chap_count(tokens) status = Story._get_status(tokens) # Informations are sorted alphabetically here self.ifs = { 'auth': re.search(c.AUTHOR_REGEX, page).group(1), # str 'auth_id': re.search(c.AUTHOR_ID_REGEX, page).group(1), # str 'c_count': Story._get_chap_count(tokens), # • int 'lang': tokens.split(' - ')[2], # str 'chap': re.findall(c.CHAPTERS_REGEX, page)[:chap_count], # • list 'n_id': num_id, # str 'publ': re.search(c.PUBLISHED_REGEX, tokens).group(1), # str 'rating': tokens.split(' - ')[1], # str 's_dir': f'{text_id}_{num_id}'.lower(), # str 'status': Story._get_status(tokens), # str 'smry': re.search(c.SUMMARY_REGEX, page).group(1), # str 't_id': text_id.lower(), # str 'title': re.search(c.STORY_TITLE_REGEX, page).group(1), # str 'tk': Story._insert_status(tokens, status), # str 'uni': Story._get_universe(page), # str 'upd': Story._get_update(tokens), # str 'url': f'{c.ROOT_URL}{num_id}/1/{text_id}', # str 'w_count': Story._get_words_count(tokens), # • int }
def __init__(self,original, label=''): self.label = label self.original = original self.tokenized = t.clean(self.original.split()) self.lemmas = [] self.unknown = [] self.bilemmas = [] self.postags = [] self.bipostags = [] self.markers = [] self.emendationLogs = [] def compute(self, bigrams=False): self.lemmas, self.unknownLemmas = t.lemmatize(self.tokenized) self.postags , self.unknownPostags = t.pos(self.lemmas) if bigrams: self.bilemmas = u.getNgrams(self.lemmas, min=2, max=3) self.bipostags = u.getNgrams(self.postags, min=2, max=3)
def add_images(result: dict) -> dict: """ Add image links via Wikidata Linked Data """ # TO DO: add license information and show in result COMMONS = "https://commons.wikimedia.org/wiki/File:" images = {} for section in result: if "wikidata" in section: wikidata = lookup_hit(result[section], "wikidata") wikidata = json.loads(wikidata) qcode = result[section].split("/")[-1] image = wikidata["entities"][qcode]["claims"].get("P18") if image is None: return result imageref = image[0]["mainsnak"]["datavalue"].get("value") imageref = tools.clean(imageref.replace(" ", "_"), "url") imagelink = COMMONS + imageref imagelink = imagelink images[section + "image"] = get_commonslink(imagelink) newresult = {**result, **images} return newresult
'...'), end="") shutil.copyfile(file, os.path.join(dest_path, os.path.basename(file))) print(' Done!') if __name__ == '__main__': print(chr(27) + "[2J") print('Starting build!') t1 = time.time() production = False print('\n\n=== C L E A N ===') print(f'cleaning {data["options"]["prod"]}...', end='') tools.clean(os.path.expanduser(data['options']['prod'])) print(' Done!') if len(argv) > 1 and argv[1] == 'production': production = True s3 = None if production: ## AWS ## s3_bucket_name = data['options']['s3 bucket'] print('\n\n=== A W S ===\n\nCleaning {0}'.format(s3_bucket_name)) s3 = boto3.resource('s3') s3_bucket = s3.Bucket(s3_bucket_name) for key in s3_bucket.objects.all(): key.delete()
def predict(self, raw_text): text = tools.clean(raw_text) ngrams_text = ngrams.get_ngrams(text, self.ngram_dict.n) hist = self.ngram_dict.get_histogram(ngrams_text) cat = knn(hist, self.dataset, metrics.l2) return cat
def run (self): if self.dim.get () == "" or self.T.get () == "": return self.txt.delete('1.0', tk.END) self.pbl = 0 if self.typeproblem.get () == "Transport": self.pbl = transport_class.transport () elif self.typeproblem.get () == "Burgers": self.pbl = burgers_class.burgers () else: self.pbl = shallow_water_class.shallow_water () self.disp ("Equation : {}".format (self.typeproblem.get ())) self.pbl.disp = self.disp disp = self.disp pdata = 0 if int (self.load_vtk.get ()) == 0: if int (self.dim.get ()) == 1: pdata = tools.clean (tools.gmsh1D ([[0.], [1.]], [0.01, 0.01])) if int (self.dim.get ()) == 2: pdata = tools.clean (tools.gmsh2D ([[0., 0., 0.], [1., 0., 0.], [1., 1., 0.], [0., 1., 0.]], [0.05, 0.05, 0.05, 0.05])) else: filename = filedialog.askopenfilename(initialdir = ".",title = "Select file", filetypes = (("vtk files","*.vtk"),("all files","*.*"))) self.disp ("Load VTK : {}".format (filename)) pdata = tools.readVTK (filename) self.pbl.setPdata (pdata) if self.pbl.pdata.GetNumberOfCells == 0 or self.pbl.pdata.GetNumberOfPoints == 0: return self.pbl.buildInterfaces () self.pbl.defineCond_init () self.pbl.useCFL = False self.pbl.isPeriodic = False if self.cfl_checkbox.get () == 1: self.pbl.useCFL = True if self.periodic.get () == 1: self.pbl.isPeriodic = True if self.imposeBoundary.get () == 1: self.pbl.imposeBoundary = True self.pbl.dt = float (self.dt.get ()) self.pbl.T = float (self.T.get ()) self.pbl.cfl_nu = float (self.nu.get ()) self.pbl.name_numericalFlow = self.scheme.get () if self.scheme.get () == "upwind": self.pbl.numericalFlow = self.pbl.up_wind elif self.scheme.get () == "laxfriedrichs": self.pbl.numericalFlow = self.pbl.lax_friedrichs elif self.scheme.get () == "laxwendroff": self.pbl.numericalFlow = self.pbl.lax_wendroff elif self.scheme.get () == "godunov": self.pbl.numericalFlow = self.pbl.godunov elif self.scheme.get () == "maccormak": self.pbl.numericalFlow = self.pbl.mac_cormak else: self.pbl.numericalFlow = self.pbl.up_wind self.pbl.analysis () self.pbl.iterate (self.fun_set_progress) self.plot ()
def predict(self,raw_text): text=tools.clean(raw_text) ngrams_text=ngrams.get_ngrams(text,self.ngram_dict.n) hist=self.ngram_dict.get_histogram(ngrams_text) cat=knn(hist,self.dataset,metrics.l2) return cat