Esempio n. 1
0
    def onMQTTPublish(self, topic, message):
        Domoticz.Debug("MQTT message: " + topic + " " + str(message))
        if str(topic) == 'domoticz/out':
            if message.get('idx') is not None:
                domoticz_id = str(message['idx'])
                if message.get('Type') is not None and (message['Type'] == 'Scene' or message['Type'] == 'Group'):
                    domoticz_id = 'group_' + domoticz_id
            else:
                return
            device = None
            if domoticz_id in self.domoticzDevicesById:
                device = self.domoticzDevicesById[domoticz_id]
            elif domoticz_id in self.linkedDevices and self.linkedDevices[domoticz_id] in self.domoticzDevicesById:
                # Get the device to which this device message is linked to, for example, device id 13 -> update device 12
                device = self.domoticzDevicesById[self.linkedDevices[domoticz_id]]
            if device is not None:
                adapter = getAdapter(device)
                if adapter is not None:
                    adapter.publishStateFromDomoticzTopic(self.mqttClient, device, self.base_topic, message)
                else:
                    Domoticz.Error('No adapter registered to publish state for device: %s' % str(device))

        else:
            if message == 'SYNC':
                self.syncDevices()
            elif topic.endswith('/set'):
                Domoticz.Debug('Published new state for device, topic: %s, state: %s' % (topic, message))
            else:
                match = re.search(self.base_topic + '/(.*)/(.*)', topic)

                if match:
                    device_id = match.group(1)
                    # Backwards compatibility, previously the device name was used as topic name
                    if device_id in self.domoticzDevicesByName:
                        device = self.domoticzDevicesByName[device_id]
                    elif device_id in self.domoticzDevicesById:
                        device = self.domoticzDevicesById[device_id]
                    else:
                        Domoticz.Log('Received message for device which is not in Domoticz: %s, skipping' % device_id)
                        return
                    action = match.group(2)
                    adapter = getAdapter(device)
                    if adapter is not None:
                        adapter.handleMqttMessage(device, str(message), action, self.domoticz_port)
                        if not self.domoticz_mqtt_used:
                            adapter.publishState(self.mqttClient, device, self.base_topic, message) # answer directly
                    else:
                        Domoticz.Error('No adapter registered for action: %s for device: %s' % (action, str(device)))
Esempio n. 2
0
def get_urls_from_text(data,configuration=None,normalize=False):
    urls = collections.OrderedDict()
    data=unicode(data)

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)
    
    for href in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', data):
        # this (should) catch normal story links, some javascript
        # 'are you old enough' links, and 'Report This' links.
        if 'story.php' in href:
            m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",href)
            if m != None:
                href = form_url(href,m.group('sid'))
        try:
            href = href.replace('&index=1','')
            adapter = adapters.getAdapter(configuration,href)
            if adapter.story.getMetadata('storyUrl') not in urls:
                urls[adapter.story.getMetadata('storyUrl')] = [href]
            else:
                urls[adapter.story.getMetadata('storyUrl')].append(href)
        except:
            pass

    # Simply return the longest URL with the assumption that it contains the
    # most user readable metadata, if not normalized
    return urls.keys() if normalize else [max(value, key=len) for key, value in urls.items()]
Esempio n. 3
0
def get_urls_from_text(data, configuration=None, normalize=False):
    urls = collections.OrderedDict()
    data = unicode(data)

    if not configuration:
        configuration = Configuration("test1.com", "EPUB")

    for href in re.findall(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            data):
        # this (should) catch normal story links, some javascript
        # 'are you old enough' links, and 'Report This' links.
        if 'story.php' in href:
            m = re.search(
                r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",
                href)
            if m != None:
                href = form_url(href, m.group('sid'))
        try:
            href = href.replace('&index=1', '')
            adapter = adapters.getAdapter(configuration, href)
            if adapter.story.getMetadata('storyUrl') not in urls:
                urls[adapter.story.getMetadata('storyUrl')] = [href]
            else:
                urls[adapter.story.getMetadata('storyUrl')].append(href)
        except:
            pass

    # Simply return the longest URL with the assumption that it contains the
    # most user readable metadata, if not normalized
    return urls.keys() if normalize else [
        max(value, key=len) for key, value in urls.items()
    ]
Esempio n. 4
0
def get_urls_from_text(data,configuration=None,normalize=False):

    normalized = [] # normalized url
    retlist = [] # orig urls.
    data=unicode(data)
    
    if not configuration:
        configuration = Configuration("test1.com","EPUB")
    
    for href in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', data):
        # this (should) catch normal story links, some javascript
        # 'are you old enough' links, and 'Report This' links.
        # The 'normalized' set prevents duplicates.
        if 'story.php' in href:
            m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",href)
            if m != None:
                href = form_url(href,m.group('sid'))
        try:
            href = href.replace('&index=1','')
            adapter = adapters.getAdapter(configuration,href)
            if adapter.story.getMetadata('storyUrl') not in normalized:
                normalized.append(adapter.story.getMetadata('storyUrl'))
                retlist.append(href)
        except:
            pass

    if normalize:
        return normalized
    else:
        return retlist
Esempio n. 5
0
def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None,email=False):
    urls = collections.OrderedDict()

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)

    ## soup and re-soup because BS4/html5lib is more forgiving of
    ## incorrectly nested tags that way.
    soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #logger.debug("restrict search:%s"%soup)

    for a in soup.findAll('a'):
        if a.has_attr('href'):
            #logger.debug("a['href']:%s"%a['href'])
            href = form_url(url,a['href'])
            #logger.debug("1 urlhref:%s"%href)
            href = cleanup_url(href,email)
            try:
                #logger.debug("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration,href)
                #logger.debug("found adapter")
                if adapter.story.getMetadata('storyUrl') not in urls:
                    urls[adapter.story.getMetadata('storyUrl')] = [href]
                else:
                    urls[adapter.story.getMetadata('storyUrl')].append(href)
            except Exception, e:
                #logger.debug e
                pass
Esempio n. 6
0
def get_urls_from_text(data, configuration=None, normalize=False, email=False):
    urls = collections.OrderedDict()
    try:
        data = unicode(data)
    except UnicodeDecodeError:
        data = data.decode('utf8')  ## for when called outside calibre.

    if not configuration:
        configuration = Configuration(["test1.com"], "EPUB", lightweight=True)

    for href in re.findall(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            data):
        href = cleanup_url(href, email)
        try:
            adapter = adapters.getAdapter(configuration, href)
            if adapter.story.getMetadata('storyUrl') not in urls:
                urls[adapter.story.getMetadata('storyUrl')] = [href]
            else:
                urls[adapter.story.getMetadata('storyUrl')].append(href)
        except:
            pass

    # Simply return the longest URL with the assumption that it contains the
    # most user readable metadata, if not normalized
    return urls.keys() if normalize else [
        max(value, key=len) for key, value in urls.items()
    ]
Esempio n. 7
0
def get_urls_from_text(data, configuration=None, normalize=False):

    normalized = []  # normalized url
    retlist = []  # orig urls.
    data = unicode(data)

    if not configuration:
        configuration = Configuration("test1.com", "EPUB")

    for href in re.findall(
            'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
            data):
        # this (should) catch normal story links, some javascript
        # 'are you old enough' links, and 'Report This' links.
        # The 'normalized' set prevents duplicates.
        if 'story.php' in href:
            m = re.search(
                r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",
                href)
            if m != None:
                href = form_url(href, m.group('sid'))
        try:
            href = href.replace('&index=1', '')
            adapter = adapters.getAdapter(configuration, href)
            if adapter.story.getMetadata('storyUrl') not in normalized:
                normalized.append(adapter.story.getMetadata('storyUrl'))
                retlist.append(href)
        except:
            pass

    if normalize:
        return normalized
    else:
        return retlist
Esempio n. 8
0
def get_urls_from_html(data,
                       url=None,
                       configuration=None,
                       normalize=False,
                       restrictsearch=None,
                       email=False):
    urls = collections.OrderedDict()

    if not configuration:
        configuration = Configuration(["test1.com"], "EPUB", lightweight=True)

    ## soup and re-soup because BS4/html5lib is more forgiving of
    ## incorrectly nested tags that way.
    soup = BeautifulSoup(unicode(BeautifulSoup(data, "html5lib")), "html5lib")
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #logger.debug("restrict search:%s"%soup)

    for a in soup.findAll('a'):
        if a.has_attr('href'):
            #logger.debug("a['href']:%s"%a['href'])
            href = form_url(url, a['href'])
            #logger.debug("1 urlhref:%s"%href)
            href = cleanup_url(href, email)
            try:
                #logger.debug("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration, href)
                #logger.debug("found adapter")
                if adapter.story.getMetadata('storyUrl') not in urls:
                    urls[adapter.story.getMetadata('storyUrl')] = [href]
                else:
                    urls[adapter.story.getMetadata('storyUrl')].append(href)
            except Exception, e:
                #logger.debug e
                pass
Esempio n. 9
0
def get_urls_from_page(url, configuration=None, normalize=False):

    if not configuration:
        configuration = Configuration(["test1.com"], "EPUB", lightweight=True)

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration, url, anyurl=True)

        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl = ""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url + addurl)
                # login the session.
                adapter.performLogin(url, data)
                # get the list page with logged in session.

        if 'fimfiction.net' in url and adapter.getConfig("is_adult"):
            data = adapter._fetchUrl(url)
            adapter.set_adult_cookie()

        if 'tthfanfic.org' in url and adapter.getConfig("is_adult"):
            ## Simple fetch works in testing, but actual pages use a
            ## POST and has a 'ctkn' value, so we do too.
            # adapter._fetchUrl("https://www.tthfanfic.org/setmaxrating.php?sitemaxrating=5")
            adapter.setSiteMaxRating(url)

        # this way it uses User-Agent or other special settings.
        data = adapter._fetchUrl(url, usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(), GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch = None
    if 'scarvesandcoffee.net' in url:
        restrictsearch = ('div', {'id': 'mainpage'})

    return get_urls_from_html(data, url, configuration, normalize,
                              restrictsearch)
Esempio n. 10
0
def get_urls_from_page(url,configuration=None,normalize=False):

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration,url,anyurl=True)

        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl=""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url+addurl)
                # login the session.
                adapter.performLogin(url,data)
                # get the list page with logged in session.

        if 'fimfiction.net' in url and adapter.getConfig("is_adult"):
            data = adapter._fetchUrl(url)
            adapter.set_adult_cookie()

        if 'tthfanfic.org' in url and adapter.getConfig("is_adult"):
            ## Simple fetch works in testing, but actual pages use a
            ## POST and has a 'ctkn' value, so we do too.
            # adapter._fetchUrl("https://www.tthfanfic.org/setmaxrating.php?sitemaxrating=5")
            adapter.setSiteMaxRating(url)

        # this way it uses User-Agent or other special settings.
        data = adapter._fetchUrl(url,usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch=None
    if 'scarvesandcoffee.net' in url:
        restrictsearch=('div',{'id':'mainpage'})

    return get_urls_from_html(data,url,configuration,normalize,restrictsearch)
Esempio n. 11
0
def get_urls_from_html(data,
                       url=None,
                       configuration=None,
                       normalize=False,
                       restrictsearch=None):
    urls = collections.OrderedDict()

    if not configuration:
        configuration = Configuration(["test1.com"], "EPUB", lightweight=True)

    ## soup and re-soup because BS4/html5lib is more forgiving of
    ## incorrectly nested tags that way.
    soup = BeautifulSoup(unicode(BeautifulSoup(data, "html5lib")), "html5lib")
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #logger.debug("restrict search:%s"%soup)

    for a in soup.findAll('a'):
        if a.has_attr('href'):
            #logger.debug("a['href']:%s"%a['href'])
            href = form_url(url, a['href'])
            #logger.debug("1 urlhref:%s"%href)
            # this (should) catch normal story links, some javascript
            # 'are you old enough' links, and 'Report This' links.
            if 'story.php' in a['href']:
                #logger.debug("trying:%s"%a['href'])
                m = re.search(
                    r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",
                    a['href'])
                if m != None:
                    href = form_url(a['href'] if '//' in a['href'] else url,
                                    m.group('sid'))

            try:
                href = href.replace('&index=1', '')
                #logger.debug("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration, href)
                #logger.debug("found adapter")
                if adapter.story.getMetadata('storyUrl') not in urls:
                    urls[adapter.story.getMetadata('storyUrl')] = [href]
                else:
                    urls[adapter.story.getMetadata('storyUrl')].append(href)
            except Exception, e:
                #logger.debug e
                pass
Esempio n. 12
0
def get_urls_from_html(data,
                       url=None,
                       configuration=None,
                       normalize=False,
                       restrictsearch=None):

    normalized = []  # normalized url
    retlist = []  # orig urls.

    if not configuration:
        configuration = Configuration("test1.com", "EPUB")

    soup = BeautifulSoup(data)
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #print("restrict search:%s"%soup)

    for a in soup.findAll('a'):
        if a.has_key('href'):
            #print("a['href']:%s"%a['href'])
            href = form_url(url, a['href'])
            #print("1 urlhref:%s"%href)
            # this (should) catch normal story links, some javascript
            # 'are you old enough' links, and 'Report This' links.
            # The 'normalized' set prevents duplicates.
            if 'story.php' in a['href']:
                #print("trying:%s"%a['href'])
                m = re.search(
                    r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",
                    a['href'])
                if m != None:
                    href = form_url(a['href'] if '//' in a['href'] else url,
                                    m.group('sid'))

            try:
                href = href.replace('&index=1', '')
                #print("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration, href)
                #print("found adapter")
                if adapter.story.getMetadata('storyUrl') not in normalized:
                    normalized.append(adapter.story.getMetadata('storyUrl'))
                    retlist.append(href)
            except Exception, e:
                #print e
                pass
Esempio n. 13
0
def get_urls_from_page(url, configuration=None, normalize=False):

    if not configuration:
        configuration = Configuration("test1.com", "EPUB")

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration, url, anyurl=True)

        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl = ""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url + addurl)
                # login the session.
                adapter.performLogin(url, data)
                # get the list page with logged in session.

        # this way it uses User-Agent or other special settings.  Only AO3
        # is doing login.
        data = adapter._fetchUrl(url, usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(), GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch = None
    if 'scarvesandcoffee.net' in url:
        restrictsearch = ('div', {'id': 'mainpage'})

    return get_urls_from_html(data, url, configuration, normalize,
                              restrictsearch)
Esempio n. 14
0
def get_urls_from_page(url,configuration=None,normalize=False):

    if not configuration:
        configuration = Configuration("test1.com","EPUB")

    data = None
    adapter = None
    try:
        adapter = adapters.getAdapter(configuration,url,anyurl=True)
        
        # special stuff to log into archiveofourown.org, if possible.
        # Unlike most that show the links to 'adult' stories, but protect
        # them, AO3 doesn't even show them if not logged in.  Only works
        # with saved user/pass--not going to prompt for list.
        if 'archiveofourown.org' in url:
            if adapter.getConfig("username"):
                if adapter.getConfig("is_adult"):
                    if '?' in url:
                        addurl = "&view_adult=true"
                    else:
                        addurl = "?view_adult=true"
                else:
                    addurl=""
                # just to get an authenticity_token.
                data = adapter._fetchUrl(url+addurl)
                # login the session.
                adapter.performLogin(url,data)
                # get the list page with logged in session.
    
        # this way it uses User-Agent or other special settings.  Only AO3
        # is doing login.
        data = adapter._fetchUrl(url,usecache=False)
    except UnknownSite:
        # no adapter with anyurl=True, must be a random site.
        opener = u2.build_opener(u2.HTTPCookieProcessor(),GZipProcessor())
        data = opener.open(url).read()

    # kludge because I don't see it on enough sites to be worth generalizing yet.
    restrictsearch=None
    if 'scarvesandcoffee.net' in url:
        restrictsearch=('div',{'id':'mainpage'})

    return get_urls_from_html(data,url,configuration,normalize,restrictsearch)
Esempio n. 15
0
def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None):

    normalized = [] # normalized url
    retlist = [] # orig urls.
    
    if not configuration:
        configuration = Configuration("test1.com","EPUB")

    soup = BeautifulSoup(data)
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #print("restrict search:%s"%soup)
    
    for a in soup.findAll('a'):
        if a.has_key('href'):
            #print("a['href']:%s"%a['href'])
            href = form_url(url,a['href'])
            #print("1 urlhref:%s"%href)
            # this (should) catch normal story links, some javascript
            # 'are you old enough' links, and 'Report This' links.
            # The 'normalized' set prevents duplicates.
            if 'story.php' in a['href']:
                #print("trying:%s"%a['href'])
                m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",a['href'])
                if m != None:
                    href = form_url(a['href'] if '//' in a['href'] else url,
                                    m.group('sid'))
                    
            try:
                href = href.replace('&index=1','')
                #print("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration,href)
                #print("found adapter")
                if adapter.story.getMetadata('storyUrl') not in normalized:
                    normalized.append(adapter.story.getMetadata('storyUrl'))
                    retlist.append(href)
            except Exception, e:
                #print e
                pass
Esempio n. 16
0
def get_urls_from_html(data,url=None,configuration=None,normalize=False,restrictsearch=None):
    urls = collections.OrderedDict()

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)

    ## soup and re-soup because BS4/html5lib is more forgiving of
    ## incorrectly nested tags that way.
    soup = BeautifulSoup(unicode(BeautifulSoup(data,"html5lib")),"html5lib")
    if restrictsearch:
        soup = soup.find(*restrictsearch)
        #logger.debug("restrict search:%s"%soup)
    
    for a in soup.findAll('a'):
        if a.has_attr('href'):
            #logger.debug("a['href']:%s"%a['href'])
            href = form_url(url,a['href'])
            #logger.debug("1 urlhref:%s"%href)
            # this (should) catch normal story links, some javascript
            # 'are you old enough' links, and 'Report This' links.
            if 'story.php' in a['href']:
                #logger.debug("trying:%s"%a['href'])
                m = re.search(r"(?P<sid>(view)?story\.php\?(sid|psid|no|story|stid)=\d+)",a['href'])
                if m != None:
                    href = form_url(a['href'] if '//' in a['href'] else url,
                                    m.group('sid'))
                    
            try:
                href = href.replace('&index=1','')
                #logger.debug("2 urlhref:%s"%href)
                adapter = adapters.getAdapter(configuration,href)
                #logger.debug("found adapter")
                if adapter.story.getMetadata('storyUrl') not in urls:
                    urls[adapter.story.getMetadata('storyUrl')] = [href]
                else:
                    urls[adapter.story.getMetadata('storyUrl')].append(href)
            except Exception, e:
                #logger.debug e
                pass
Esempio n. 17
0
    def syncDevices(self, domoticz_devices_by_name, bridge_devices,
                    delete_removed_devices):
        bridge_devices_by_name = {x['name']: x for x in bridge_devices}

        # Add devices which are not in gBridge yet
        for name, device in domoticz_devices_by_name.items():
            if name not in bridge_devices_by_name:
                adapter = getAdapter(device)
                if adapter is None:
                    Domoticz.Error('No gBridge adapter found for device: ' +
                                   str(device))
                    continue

                traits = adapter.getTraits()
                type = adapter.getBridgeType(device)
                prefix = device['idx']
                self.createDevice(name, type, traits, prefix)

        # remove devices in gbridge which are no longer in domoticz
        if delete_removed_devices:
            for device in bridge_devices:
                if device['name'] not in domoticz_devices_by_name:
                    self.deleteDevice(device['device_id'])
Esempio n. 18
0
def get_urls_from_text(data,configuration=None,normalize=False,email=False):
    urls = collections.OrderedDict()
    try:
        data = unicode(data)
    except UnicodeDecodeError:
        data=data.decode('utf8') ## for when called outside calibre.

    if not configuration:
        configuration = Configuration(["test1.com"],"EPUB",lightweight=True)

    for href in re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', data):
        href = cleanup_url(href,email)
        try:
            adapter = adapters.getAdapter(configuration,href)
            if adapter.story.getMetadata('storyUrl') not in urls:
                urls[adapter.story.getMetadata('storyUrl')] = [href]
            else:
                urls[adapter.story.getMetadata('storyUrl')].append(href)
        except:
            pass

    # Simply return the longest URL with the assumption that it contains the
    # most user readable metadata, if not normalized
    return urls.keys() if normalize else [max(value, key=len) for key, value in urls.items()]