def __init__(self, title_html):
     self.index_iri = title_html
     self.chapter_list = []
     self.title = sanitize_string(
         re.search(r'https://m.wuxiaworld.co/(.*)/all.html',
                   title_html).group(1))
     logging.basicConfig(filename='book.log', level=logging.DEBUG)
def read_config():
    '''
    Function to read configurations from config file.
    '''
    global DEFAULT_DIR, FILTERS, OPTION_SELECTED, RADIO_SELECTED, SEGMENT_SELECTED
    try:
        config_file = open(opj('config/config.json'))
        data = json.load(config_file)
        config_file.close()

        #default_dir
        DEFAULT_DIR = data["configuration"]["PATH"]

        #filter
        FILTERS = data["configuration"]["FILTER"]

        #history_option
        OPTION_SELECTED = data["configuration"]["OPTION"]

        #rename option
        RADIO_SELECTED = data["configuration"]["RENAME"]

        #segment option
        SEGMENT_SELECTED = data["configuration"]["SEGMENT"]

    except ValueError:
        pass

    #Trailing extra whitespaces
    var = [DEFAULT_DIR, FILTERS, OPTION_SELECTED, RADIO_SELECTED, SEGMENT_SELECTED]
    DEFAULT_DIR, FILTERS, OPTION_SELECTED, RADIO_SELECTED, SEGMENT_SELECTED = utils.sanitize_string(var)
Example #3
0
def parse(text):
    """Parse configuration file.

    >>> parse(EXAMPLE_TEXT)
    {'DEFAULT': [('foo', 'no')], 'hi': [('foo', '1'), ('bar', '2'), ('baz', '3')], 'hello': [('foo', '1'), ('foo', '${hi:bar}'), ('bar', 'baz')]}

    """
    current_section = 'DEFAULT'
    sections = {'DEFAULT': []}
    for line_nr, line in enumerate(text.split("\n")):
        line = utils.sanitize_string(line, comment_starts=['#', ';'])
        section_match = re.match(r'^\[([-.a-zA-Z0-9_]*)\]$', line)
        keyvalue_match = re.match(r'^([-.a-zA-Z0-9_]+)\s*=\s*(.*)$', line)

        if not line:
            continue

        elif section_match is not None:
            current_section = section_match.group(1)

        elif keyvalue_match is not None:
            section = sections.setdefault(current_section, [])
            section.append((keyvalue_match.group(1), keyvalue_match.group(2)))

        else:
            raise SyntaxError("No Section or key-value line at %i" % line_nr)

    return sections
Example #4
0
    def __init__(self, master=None, **kwargs):

        # Set the custom attributes and pop'em out
        self.entity = kwargs.pop("entity")

        # Initialize the frame
        kwargs["borderwidth"] = 2
        kwargs["relief"] = "ridge"
        super().__init__(master, **kwargs)

        # Set up our custom widget
        self.profile_picture = Label(self)
        self.profile_picture.grid(row=0, column=0, sticky=NSEW)

        self.right_column = Frame(self, padding=(16, 0))
        self.right_column.grid(row=0, column=1)

        self.name_label = Label(
            self.right_column, text=sanitize_string(get_display_name(self.entity)), font="-weight bold -size 14"
        )
        self.name_label.grid(row=0, sticky=NW)

        if hasattr(self.entity, "username"):
            self.username_label = Label(self.right_column, text="@{}".format(self.entity.username), font="-size 12")
            self.username_label.grid(row=1, sticky=NW)

        if hasattr(self.entity, "phone"):
            self.phone_label = Label(self.right_column, text="+{}".format(self.entity.phone))
            self.phone_label.grid(row=2, sticky=NW)

        elif hasattr(self.entity, "participants_count"):
            self.participants_label = Label(
                self.right_column, text="{} participants".format(self.entity.participants_count)
            )
            self.participants_label.grid(row=2, sticky=NW)
def location_element_to_dict(location_element, gis_data):
	result = {}
	parts = {
		'name': '.listing__name a',
		'street_address': '[itemprop="streetAddress"]',
		'locality': '[itemprop="addressLocality"]',
		'address_region': '[itemprop="addressRegion"]',
		'postal_code': '[itemprop="postalCode"]',
		'phone_number': '.jsMapBubblePhone h4'
	}
	for key in parts:
		element = location_element.cssselect(parts[key])
		if len(element) != 0:
			result[key] = element[0].text_content()
		else:
			result[key] = ''

	result['external_web_url'] = get_web_url_from(location_element)
	result['name'] = utils.sanitize_string(result['name'])
	analytics_pin = json.loads(location_element.cssselect('[data-analytics-pin]')[0].xpath('@data-analytics-pin')[0])
	result['external_id'] = analytics_pin['lk_listing_id']
	coordinates = get_coordinates_for(gis_data, result['external_id'], analytics_pin['lk_pos_num'], result['name'])
	if coordinates is not None:
		result['latitude'] = coordinates[0]
		result['longitude'] = coordinates[1]
	else:
		result['latitude'] = None
		result['longitude'] = None
	result['address'] = get_full_address_from(result)
	return result
def get_location_dict_from_item(item_element):
    result = {}
    blank_keys = [
        'street_address', 'locality', 'address_region', 'postal_code'
    ]
    for key in blank_keys:
        result[key] = ''

    ns = {'georss': 'http://www.georss.org/georss'}
    coords_s = str(item_element.xpath('.//georss:point', namespaces=ns)[0])
    coords = coords_s.split(' ')
    if len(coords) == 2:
        result['latitude'] = float(coords[0])
        result['longitude'] = float(coords[1])
    else:
        print('coords length is expected to be 2 but is %d, coords = %s' %
              (len(coords), coords_s))
        result['latitude'] = None
        result['longitude'] = None

    result['name'] = utils.sanitize_string(
        item_element.findtext('.//title').strip())
    address_str = item_element.findtext('.//description').strip()
    link = item_element.findtext('.//link').strip()
    result['external_id'] = get_id_from_link_url(link)
    parts = address_str.split(' ')
    result['phone_number'] = ''
    if len(parts) != 0 and len(utils.get_digits(parts[-1])) > 7:
        result['phone_number'] = utils.get_digits(parts[-1])
    return result
Example #7
0
def location_element_to_dict(location_element, gis_data):
    result = {}
    parts = {
        'name': '.listing__name a',
        'street_address': '[itemprop="streetAddress"]',
        'locality': '[itemprop="addressLocality"]',
        'address_region': '[itemprop="addressRegion"]',
        'postal_code': '[itemprop="postalCode"]',
        'phone_number': '.jsMapBubblePhone h4'
    }
    for key in parts:
        element = location_element.cssselect(parts[key])
        if len(element) != 0:
            result[key] = element[0].text_content()
        else:
            result[key] = ''

    result['external_web_url'] = get_web_url_from(location_element)
    result['name'] = utils.sanitize_string(result['name'])
    analytics_pin = json.loads(
        location_element.cssselect('[data-analytics-pin]')[0].xpath(
            '@data-analytics-pin')[0])
    result['external_id'] = analytics_pin['lk_listing_id']
    coordinates = get_coordinates_for(gis_data, result['external_id'],
                                      analytics_pin['lk_pos_num'],
                                      result['name'])
    if coordinates is not None:
        result['latitude'] = coordinates[0]
        result['longitude'] = coordinates[1]
    else:
        result['latitude'] = None
        result['longitude'] = None
    result['address'] = get_full_address_from(result)
    return result
 def process_raw_title(self):
     search = re.search(r'<a style="" href="(.*?)">(.*)<\/a>', self.raw_title)
     if search:
         self.link += search.group(1)
         self.title = sanitize_string(search.group(2))
         return search
     logging.warning(f'Failed to extract {self.raw_title} title!')
     return search
Example #9
0
 def search_mentions(self, msg):
     msg_plain = sanitize_string(msg.text)
     msg_words = set(msg_plain.split())
     if not self.mention_words.intersection(msg_words):
         return
     logger.info("mention: responding to user %s" % user_log_string(msg))
     audio, duration = self.dataset.random_phrase(msg_words)
     self.send_audio(msg.chat.id, audio, duration)
Example #10
0
 def search_mentions(self, msg):
     msg_plain = sanitize_string(msg.text)
     msg_words = set(msg_plain.split())
     if not self.mention_words.intersection(msg_words):
         return
     logger.info("mention: responding to user %s" % user_log_string(msg))
     audio, duration = self.dataset.random_phrase(msg_words)
     self.send_audio(msg.chat.id, audio, duration)
Example #11
0
def parse_config(config_path, catalogue):
    global address
    global cluster
    global environment
    global address
    global listen
    global connect
    global heartbeat_interval
    global heartbeat_timeout
    global reconnect_interval

    p = configparser.RawConfigParser()
    p.read(config_path)

    cluster      = utils.sanitize_string(p.get(catalogue, "cluster"))
    environment  = utils.sanitize_string(p.get(catalogue, "environment"))
    heartbeat_interval = timedelta(seconds=int(utils.sanitize_string(p.get(catalogue, "heartbeat_interval"))))
    heartbeat_timeout = timedelta(seconds=int(utils.sanitize_string(p.get(catalogue, "heartbeat_timeout"))))
    reconnect_interval = timedelta(seconds=int(utils.sanitize_string(p.get(catalogue, "reconnect_interval"))))
    listen_list  = utils.sanitize_string(p.get(catalogue, "listen"))
    connect_list = utils.sanitize_string(p.get(catalogue, "connect"))

    for item in listen_list.split(" "):
        listen.update(parse_address(item))

    for item in connect_list.split(" "):
        connect.update(parse_address(item))

    print("listen:", listen)
    print("connect:", connect)
def build_sub_category_by_title(category_code, sub_category_title, lang):
    category = api.category(category_code, lang)
    sub_category = hof.find(
        lambda i: utils.sanitize_string(i.get('title')) == sub_category_title,
        category)

    return [
        mapper.map_generic_item(item) for item in sub_category.get('teasers')
    ]
Example #13
0
    def update_conversation_list(self):
        """Updates the conversation list with the currently
           loaded entities and filtered by the current search"""
        search = self.search_box.get().lower()
        self.conversation_list.delete(0, END)

        for entity in self.entities:
            display = sanitize_string(get_display_name(entity))
            if search in display.lower():
                self.conversation_list.insert(END, display)
    def update_conversation_list(self):
        """Updates the conversation list with the currently
           loaded entities and filtered by the current search"""
        search = self.search_box.get().lower()
        self.conversation_list.delete(0, END)

        for entity in self.entities:
            display = sanitize_string(get_display_name(entity))
            if search in display.lower():
                self.conversation_list.insert(END, display)
Example #15
0
def parse_options(args):
    """Parse options.
    """

    global show_usage
    global catalogue
    global version
    global verbose
    global config_path

    try:
        opts, args = getopt.getopt(args, OPTIONS, LONG_OPTIONS)
    except getopt.GetoptError as e:
        print(e, file=sys.stderr)
        usage(2)

    for o, a in opts:
        if o in ("-h", "--help"):
            show_usage = True

        elif o in ("-V", "--version"):
            print(version, file=sys.stdout)
            sys.exit(0)

        elif o in ("-v", "--verbose"):
            verbose = True

        elif o in ("-f", "--foreground"):
            foreground = True

        elif o in ("-c", "--config-path"):
            config_path = os.path.expanduser(utils.sanitize_string(a))

    # Parse mandatory arguments.
    if len(args) > 0:
        catalogue = utils.sanitize_string(args[0])
    if catalogue is None:
        print("ERROR: Expected mandatory catalogue name as argument", file=sys.stderr)
        usage(2)

    if show_usage:
        usage(0)
def map_category_item(item, category_code):
    code = item.get('code')
    title = item.get('title')

    if code:
        path = plugin.url_for('sub_category_by_code', sub_category_code=code)
    else:
        path = plugin.url_for('sub_category_by_title',
                              category_code=category_code,
                              sub_category_title=utils.sanitize_string(title))

    return {'label': title, 'path': path}
def map_category_item(item, category_code):
    # code = item.get('code')
    title = item.get('title')

    # if code:
    #     path = plugin.url_for('sub_category_by_code',
    #                           category_code=category_code, sub_category_code=code)
    # else:
    path = plugin.url_for('sub_category_by_title',
                            category_code=category_code, sub_category_title=utils.sanitize_string(title))

    return {
        'label': title,
        'path': path
    }
Example #18
0
    def on_next(self, event=None):
        """Gets fired after the Next button is clicked"""
        # Ensure the user has selected an entity
        selection = self.conversation_list.curselection()
        if selection:
            index = selection[0]
            value = self.conversation_list.get(index)

            # Search for the matching entity (user or chat)
            # TODO Note that this will NOT work if they have the exact same name!
            for entity in self.entities:
                display = sanitize_string(get_display_name(entity))
                if value == display:
                    self.master.destroy()
                    # Import the window here to avoid cyclic dependencies
                    from gui.windows import BackupWindow
                    start_app(BackupWindow, entity=entity)
Example #19
0
    def get_series_episodes_by_name(self, series_id, sanitize_string):
        if sanitize_string is None:
            sanitize_string = {}
        sanitize_string['.'] = ''
        episodes_by_name = {}
        episodes = self.get_series_episodes(series_id)
        for episode in episodes:
            if episode['episodeName'] is not None:
                episode_name = utils.sanitize_string(episode['episodeName'],
                                                     sanitize_string)
                if episode_name not in episodes_by_name:
                    episodes_by_name[episode_name] = {
                        'season_number': episode['airedSeason'],
                        'episode_number': episode['airedEpisodeNumber']
                    }

        return episodes_by_name
    def on_next(self, event=None):
        """Gets fired after the Next button is clicked"""
        # Ensure the user has selected an entity
        selection = self.conversation_list.curselection()
        if selection:
            index = selection[0]
            value = self.conversation_list.get(index)

            # Search for the matching entity (user or chat)
            # TODO Note that this will NOT work if they have the exact same name!
            for entity in self.entities:
                display = sanitize_string(get_display_name(entity))
                if value == display:
                    self.master.destroy()
                    # Import the window here to avoid cyclic dependencies
                    from gui.windows import BackupWindow
                    start_app(BackupWindow, entity=entity)
Example #21
0
    def __init__(self, master=None, **args):
        super().__init__(master)

        # Save our entity and its display
        self.entity = args['entity']
        self.display = sanitize_string(get_display_name(self.entity))

        # Get a cached client and initialize a backuper instance with it
        self.client = get_cached_client()
        self.backuper = Backuper(self.client, self.entity)
        self.backuper.on_metadata_change = self.on_metadata_change

        # Set up the frame itself
        self.master.title('Backup with {}'.format(self.display))
        self.pack(padx=16, pady=16)
        self.create_widgets()

        # Download the profile picture in a different thread
        Thread(target=self.dl_propic).start()
Example #22
0
    def __init__(self, master=None, **kwargs):

        # Set the custom attributes and pop'em out
        self.entity = kwargs.pop('entity')

        # Initialize the frame
        kwargs['borderwidth'] = 2
        kwargs['relief'] = 'ridge'
        super().__init__(master, **kwargs)

        # Set up our custom widget
        self.profile_picture = Label(self)
        self.profile_picture.grid(row=0, column=0, sticky=NSEW)

        self.right_column = Frame(self, padding=(16, 0))
        self.right_column.grid(row=0, column=1)

        self.name_label = Label(self.right_column,
                                text=sanitize_string(
                                    get_display_name(self.entity)),
                                font='-weight bold -size 14')
        self.name_label.grid(row=0, sticky=NW)

        if hasattr(self.entity, 'username'):
            self.username_label = Label(self.right_column,
                                        text='@{}'.format(
                                            self.entity.username),
                                        font='-size 12')
            self.username_label.grid(row=1, sticky=NW)

        if hasattr(self.entity, 'phone'):
            self.phone_label = Label(self.right_column,
                                     text='+{}'.format(self.entity.phone))
            self.phone_label.grid(row=2, sticky=NW)

        elif hasattr(self.entity, 'participants_count'):
            self.participants_label = Label(
                self.right_column,
                text='{} participants'.format(self.entity.participants_count))
            self.participants_label.grid(row=2, sticky=NW)
def read_config():
    '''
    function to read config_file for rename option
    '''
    global RENAME, SEGMENT

    try:
        config_file = open(opj('config/config.json'))
        data = json.load(config_file)

        #rename option
        RENAME = data["configuration"]["RENAME"]

        #segment option
        SEGMENT = data["configuration"]["SEGMENT"]
        
    except ValueError:
        pass

    var = [RENAME, SEGMENT]

    #Trailing extra whitespaces
    RENAME, SEGMENT = utils.sanitize_string(var)
Example #24
0
 def sanitize(title,desc,**kw):
     blocks = {'title':title,'desc':desc,**kw}
     return sanitize_string(truncate_string(arg['title_fmt'] % blocks,80)),sanitize_string(truncate_string(arg['desc_fmt'] % blocks,2000))                
Example #25
0
def test_sanitize_string_unicode():
    "Make sure unicode gets cleaned properly"

    u = u"{Mart{\'{\i}}nez-Pinedo}"
    assert utils.sanitize_string(u) == "MartinezPinedo"
Example #26
0
def test_sanitize_string_string():
    "Make sure strings get cleaned properly"

    s = "{Mart{\'{\i}}nez-Pinedo}"
    assert utils.sanitize_string(s) == "MartinezPinedo"
def build_sub_category_by_title(category_code, sub_category_title, lang):
    category = api.category(category_code, lang)
    sub_category = hof.find(lambda i: utils.sanitize_string(
        i.get('title')) == sub_category_title, category)

    return [mapper.map_generic_item(item) for item in sub_category.get('teasers')]
Example #28
0
    def data(self):
        data = {}

        # paths and file data
        data['path'] = self.path
        data['pdffile'] = self.pdffile
        data['bibfile'] = self.bibfile
        data['otherfiles'] = self.otherfiles

        # information about the paper itself
        data['bibkey'] = self.bibkey

        # the BibTeX entry must have a title
        try:
            title = self.bibdata.fields['title'].strip('{}')
        except KeyError:
            raise IndexError(self.path, 'BibTeX file does not contain a title')
        else:
            # don't allow the empty string
            if not title:
                raise IndexError(self.path,
                                 'BibTeX file does not contain a title')
            data['title'] = title

        # the BibTeX entry must have authors
        try:
            authors = self.bibdata.persons['author']
        except KeyError:
            raise IndexError(self.path, 'BibTeX file does not contain authors')
        else:
            data['authors'] = [
                utils.sanitize_string(unicode(x), exceptions=".,- ")
                for x in authors
            ]

        # the first author is special
        data['1au'] = utils.sanitize_string(data['authors'][0],
                                            exceptions=".,- ")

        # the journal
        try:
            journal = self.bibdata.fields['journal']
        except KeyError:
            journal = None
        data['journal'] = utils.canonical_journal(journal)

        # the abstract
        try:
            abstract = self.bibdata.fields['abstract']
        except KeyError:
            abstract = None
        data['abstract'] = abstract

        # the year
        try:
            year = self.bibdata.fields['year']
        except KeyError:
            year = None
        data['year'] = year

        # the month
        try:
            month = self.bibdata.fields['month']
        except KeyError:
            month = ''
        data['month'] = month

        # the keywords
        try:
            keywords = self.bibdata.fields['keywords']
        except KeyError:
            keywords = None
        data['keywords'] = keywords

        return data