def __init__(self, title_html): self.index_iri = title_html self.chapter_list = [] self.title = sanitize_string( re.search(r'https://m.wuxiaworld.co/(.*)/all.html', title_html).group(1)) logging.basicConfig(filename='book.log', level=logging.DEBUG)
def read_config(): ''' Function to read configurations from config file. ''' global DEFAULT_DIR, FILTERS, OPTION_SELECTED, RADIO_SELECTED, SEGMENT_SELECTED try: config_file = open(opj('config/config.json')) data = json.load(config_file) config_file.close() #default_dir DEFAULT_DIR = data["configuration"]["PATH"] #filter FILTERS = data["configuration"]["FILTER"] #history_option OPTION_SELECTED = data["configuration"]["OPTION"] #rename option RADIO_SELECTED = data["configuration"]["RENAME"] #segment option SEGMENT_SELECTED = data["configuration"]["SEGMENT"] except ValueError: pass #Trailing extra whitespaces var = [DEFAULT_DIR, FILTERS, OPTION_SELECTED, RADIO_SELECTED, SEGMENT_SELECTED] DEFAULT_DIR, FILTERS, OPTION_SELECTED, RADIO_SELECTED, SEGMENT_SELECTED = utils.sanitize_string(var)
def parse(text): """Parse configuration file. >>> parse(EXAMPLE_TEXT) {'DEFAULT': [('foo', 'no')], 'hi': [('foo', '1'), ('bar', '2'), ('baz', '3')], 'hello': [('foo', '1'), ('foo', '${hi:bar}'), ('bar', 'baz')]} """ current_section = 'DEFAULT' sections = {'DEFAULT': []} for line_nr, line in enumerate(text.split("\n")): line = utils.sanitize_string(line, comment_starts=['#', ';']) section_match = re.match(r'^\[([-.a-zA-Z0-9_]*)\]$', line) keyvalue_match = re.match(r'^([-.a-zA-Z0-9_]+)\s*=\s*(.*)$', line) if not line: continue elif section_match is not None: current_section = section_match.group(1) elif keyvalue_match is not None: section = sections.setdefault(current_section, []) section.append((keyvalue_match.group(1), keyvalue_match.group(2))) else: raise SyntaxError("No Section or key-value line at %i" % line_nr) return sections
def __init__(self, master=None, **kwargs): # Set the custom attributes and pop'em out self.entity = kwargs.pop("entity") # Initialize the frame kwargs["borderwidth"] = 2 kwargs["relief"] = "ridge" super().__init__(master, **kwargs) # Set up our custom widget self.profile_picture = Label(self) self.profile_picture.grid(row=0, column=0, sticky=NSEW) self.right_column = Frame(self, padding=(16, 0)) self.right_column.grid(row=0, column=1) self.name_label = Label( self.right_column, text=sanitize_string(get_display_name(self.entity)), font="-weight bold -size 14" ) self.name_label.grid(row=0, sticky=NW) if hasattr(self.entity, "username"): self.username_label = Label(self.right_column, text="@{}".format(self.entity.username), font="-size 12") self.username_label.grid(row=1, sticky=NW) if hasattr(self.entity, "phone"): self.phone_label = Label(self.right_column, text="+{}".format(self.entity.phone)) self.phone_label.grid(row=2, sticky=NW) elif hasattr(self.entity, "participants_count"): self.participants_label = Label( self.right_column, text="{} participants".format(self.entity.participants_count) ) self.participants_label.grid(row=2, sticky=NW)
def location_element_to_dict(location_element, gis_data): result = {} parts = { 'name': '.listing__name a', 'street_address': '[itemprop="streetAddress"]', 'locality': '[itemprop="addressLocality"]', 'address_region': '[itemprop="addressRegion"]', 'postal_code': '[itemprop="postalCode"]', 'phone_number': '.jsMapBubblePhone h4' } for key in parts: element = location_element.cssselect(parts[key]) if len(element) != 0: result[key] = element[0].text_content() else: result[key] = '' result['external_web_url'] = get_web_url_from(location_element) result['name'] = utils.sanitize_string(result['name']) analytics_pin = json.loads(location_element.cssselect('[data-analytics-pin]')[0].xpath('@data-analytics-pin')[0]) result['external_id'] = analytics_pin['lk_listing_id'] coordinates = get_coordinates_for(gis_data, result['external_id'], analytics_pin['lk_pos_num'], result['name']) if coordinates is not None: result['latitude'] = coordinates[0] result['longitude'] = coordinates[1] else: result['latitude'] = None result['longitude'] = None result['address'] = get_full_address_from(result) return result
def get_location_dict_from_item(item_element): result = {} blank_keys = [ 'street_address', 'locality', 'address_region', 'postal_code' ] for key in blank_keys: result[key] = '' ns = {'georss': 'http://www.georss.org/georss'} coords_s = str(item_element.xpath('.//georss:point', namespaces=ns)[0]) coords = coords_s.split(' ') if len(coords) == 2: result['latitude'] = float(coords[0]) result['longitude'] = float(coords[1]) else: print('coords length is expected to be 2 but is %d, coords = %s' % (len(coords), coords_s)) result['latitude'] = None result['longitude'] = None result['name'] = utils.sanitize_string( item_element.findtext('.//title').strip()) address_str = item_element.findtext('.//description').strip() link = item_element.findtext('.//link').strip() result['external_id'] = get_id_from_link_url(link) parts = address_str.split(' ') result['phone_number'] = '' if len(parts) != 0 and len(utils.get_digits(parts[-1])) > 7: result['phone_number'] = utils.get_digits(parts[-1]) return result
def location_element_to_dict(location_element, gis_data): result = {} parts = { 'name': '.listing__name a', 'street_address': '[itemprop="streetAddress"]', 'locality': '[itemprop="addressLocality"]', 'address_region': '[itemprop="addressRegion"]', 'postal_code': '[itemprop="postalCode"]', 'phone_number': '.jsMapBubblePhone h4' } for key in parts: element = location_element.cssselect(parts[key]) if len(element) != 0: result[key] = element[0].text_content() else: result[key] = '' result['external_web_url'] = get_web_url_from(location_element) result['name'] = utils.sanitize_string(result['name']) analytics_pin = json.loads( location_element.cssselect('[data-analytics-pin]')[0].xpath( '@data-analytics-pin')[0]) result['external_id'] = analytics_pin['lk_listing_id'] coordinates = get_coordinates_for(gis_data, result['external_id'], analytics_pin['lk_pos_num'], result['name']) if coordinates is not None: result['latitude'] = coordinates[0] result['longitude'] = coordinates[1] else: result['latitude'] = None result['longitude'] = None result['address'] = get_full_address_from(result) return result
def process_raw_title(self): search = re.search(r'<a style="" href="(.*?)">(.*)<\/a>', self.raw_title) if search: self.link += search.group(1) self.title = sanitize_string(search.group(2)) return search logging.warning(f'Failed to extract {self.raw_title} title!') return search
def search_mentions(self, msg): msg_plain = sanitize_string(msg.text) msg_words = set(msg_plain.split()) if not self.mention_words.intersection(msg_words): return logger.info("mention: responding to user %s" % user_log_string(msg)) audio, duration = self.dataset.random_phrase(msg_words) self.send_audio(msg.chat.id, audio, duration)
def parse_config(config_path, catalogue): global address global cluster global environment global address global listen global connect global heartbeat_interval global heartbeat_timeout global reconnect_interval p = configparser.RawConfigParser() p.read(config_path) cluster = utils.sanitize_string(p.get(catalogue, "cluster")) environment = utils.sanitize_string(p.get(catalogue, "environment")) heartbeat_interval = timedelta(seconds=int(utils.sanitize_string(p.get(catalogue, "heartbeat_interval")))) heartbeat_timeout = timedelta(seconds=int(utils.sanitize_string(p.get(catalogue, "heartbeat_timeout")))) reconnect_interval = timedelta(seconds=int(utils.sanitize_string(p.get(catalogue, "reconnect_interval")))) listen_list = utils.sanitize_string(p.get(catalogue, "listen")) connect_list = utils.sanitize_string(p.get(catalogue, "connect")) for item in listen_list.split(" "): listen.update(parse_address(item)) for item in connect_list.split(" "): connect.update(parse_address(item)) print("listen:", listen) print("connect:", connect)
def build_sub_category_by_title(category_code, sub_category_title, lang): category = api.category(category_code, lang) sub_category = hof.find( lambda i: utils.sanitize_string(i.get('title')) == sub_category_title, category) return [ mapper.map_generic_item(item) for item in sub_category.get('teasers') ]
def update_conversation_list(self): """Updates the conversation list with the currently loaded entities and filtered by the current search""" search = self.search_box.get().lower() self.conversation_list.delete(0, END) for entity in self.entities: display = sanitize_string(get_display_name(entity)) if search in display.lower(): self.conversation_list.insert(END, display)
def parse_options(args): """Parse options. """ global show_usage global catalogue global version global verbose global config_path try: opts, args = getopt.getopt(args, OPTIONS, LONG_OPTIONS) except getopt.GetoptError as e: print(e, file=sys.stderr) usage(2) for o, a in opts: if o in ("-h", "--help"): show_usage = True elif o in ("-V", "--version"): print(version, file=sys.stdout) sys.exit(0) elif o in ("-v", "--verbose"): verbose = True elif o in ("-f", "--foreground"): foreground = True elif o in ("-c", "--config-path"): config_path = os.path.expanduser(utils.sanitize_string(a)) # Parse mandatory arguments. if len(args) > 0: catalogue = utils.sanitize_string(args[0]) if catalogue is None: print("ERROR: Expected mandatory catalogue name as argument", file=sys.stderr) usage(2) if show_usage: usage(0)
def map_category_item(item, category_code): code = item.get('code') title = item.get('title') if code: path = plugin.url_for('sub_category_by_code', sub_category_code=code) else: path = plugin.url_for('sub_category_by_title', category_code=category_code, sub_category_title=utils.sanitize_string(title)) return {'label': title, 'path': path}
def map_category_item(item, category_code): # code = item.get('code') title = item.get('title') # if code: # path = plugin.url_for('sub_category_by_code', # category_code=category_code, sub_category_code=code) # else: path = plugin.url_for('sub_category_by_title', category_code=category_code, sub_category_title=utils.sanitize_string(title)) return { 'label': title, 'path': path }
def on_next(self, event=None): """Gets fired after the Next button is clicked""" # Ensure the user has selected an entity selection = self.conversation_list.curselection() if selection: index = selection[0] value = self.conversation_list.get(index) # Search for the matching entity (user or chat) # TODO Note that this will NOT work if they have the exact same name! for entity in self.entities: display = sanitize_string(get_display_name(entity)) if value == display: self.master.destroy() # Import the window here to avoid cyclic dependencies from gui.windows import BackupWindow start_app(BackupWindow, entity=entity)
def get_series_episodes_by_name(self, series_id, sanitize_string): if sanitize_string is None: sanitize_string = {} sanitize_string['.'] = '' episodes_by_name = {} episodes = self.get_series_episodes(series_id) for episode in episodes: if episode['episodeName'] is not None: episode_name = utils.sanitize_string(episode['episodeName'], sanitize_string) if episode_name not in episodes_by_name: episodes_by_name[episode_name] = { 'season_number': episode['airedSeason'], 'episode_number': episode['airedEpisodeNumber'] } return episodes_by_name
def __init__(self, master=None, **args): super().__init__(master) # Save our entity and its display self.entity = args['entity'] self.display = sanitize_string(get_display_name(self.entity)) # Get a cached client and initialize a backuper instance with it self.client = get_cached_client() self.backuper = Backuper(self.client, self.entity) self.backuper.on_metadata_change = self.on_metadata_change # Set up the frame itself self.master.title('Backup with {}'.format(self.display)) self.pack(padx=16, pady=16) self.create_widgets() # Download the profile picture in a different thread Thread(target=self.dl_propic).start()
def __init__(self, master=None, **kwargs): # Set the custom attributes and pop'em out self.entity = kwargs.pop('entity') # Initialize the frame kwargs['borderwidth'] = 2 kwargs['relief'] = 'ridge' super().__init__(master, **kwargs) # Set up our custom widget self.profile_picture = Label(self) self.profile_picture.grid(row=0, column=0, sticky=NSEW) self.right_column = Frame(self, padding=(16, 0)) self.right_column.grid(row=0, column=1) self.name_label = Label(self.right_column, text=sanitize_string( get_display_name(self.entity)), font='-weight bold -size 14') self.name_label.grid(row=0, sticky=NW) if hasattr(self.entity, 'username'): self.username_label = Label(self.right_column, text='@{}'.format( self.entity.username), font='-size 12') self.username_label.grid(row=1, sticky=NW) if hasattr(self.entity, 'phone'): self.phone_label = Label(self.right_column, text='+{}'.format(self.entity.phone)) self.phone_label.grid(row=2, sticky=NW) elif hasattr(self.entity, 'participants_count'): self.participants_label = Label( self.right_column, text='{} participants'.format(self.entity.participants_count)) self.participants_label.grid(row=2, sticky=NW)
def read_config(): ''' function to read config_file for rename option ''' global RENAME, SEGMENT try: config_file = open(opj('config/config.json')) data = json.load(config_file) #rename option RENAME = data["configuration"]["RENAME"] #segment option SEGMENT = data["configuration"]["SEGMENT"] except ValueError: pass var = [RENAME, SEGMENT] #Trailing extra whitespaces RENAME, SEGMENT = utils.sanitize_string(var)
def sanitize(title,desc,**kw): blocks = {'title':title,'desc':desc,**kw} return sanitize_string(truncate_string(arg['title_fmt'] % blocks,80)),sanitize_string(truncate_string(arg['desc_fmt'] % blocks,2000))
def test_sanitize_string_unicode(): "Make sure unicode gets cleaned properly" u = u"{Mart{\'{\i}}nez-Pinedo}" assert utils.sanitize_string(u) == "MartinezPinedo"
def test_sanitize_string_string(): "Make sure strings get cleaned properly" s = "{Mart{\'{\i}}nez-Pinedo}" assert utils.sanitize_string(s) == "MartinezPinedo"
def build_sub_category_by_title(category_code, sub_category_title, lang): category = api.category(category_code, lang) sub_category = hof.find(lambda i: utils.sanitize_string( i.get('title')) == sub_category_title, category) return [mapper.map_generic_item(item) for item in sub_category.get('teasers')]
def data(self): data = {} # paths and file data data['path'] = self.path data['pdffile'] = self.pdffile data['bibfile'] = self.bibfile data['otherfiles'] = self.otherfiles # information about the paper itself data['bibkey'] = self.bibkey # the BibTeX entry must have a title try: title = self.bibdata.fields['title'].strip('{}') except KeyError: raise IndexError(self.path, 'BibTeX file does not contain a title') else: # don't allow the empty string if not title: raise IndexError(self.path, 'BibTeX file does not contain a title') data['title'] = title # the BibTeX entry must have authors try: authors = self.bibdata.persons['author'] except KeyError: raise IndexError(self.path, 'BibTeX file does not contain authors') else: data['authors'] = [ utils.sanitize_string(unicode(x), exceptions=".,- ") for x in authors ] # the first author is special data['1au'] = utils.sanitize_string(data['authors'][0], exceptions=".,- ") # the journal try: journal = self.bibdata.fields['journal'] except KeyError: journal = None data['journal'] = utils.canonical_journal(journal) # the abstract try: abstract = self.bibdata.fields['abstract'] except KeyError: abstract = None data['abstract'] = abstract # the year try: year = self.bibdata.fields['year'] except KeyError: year = None data['year'] = year # the month try: month = self.bibdata.fields['month'] except KeyError: month = '' data['month'] = month # the keywords try: keywords = self.bibdata.fields['keywords'] except KeyError: keywords = None data['keywords'] = keywords return data